зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1838161 - Add regular expression keywords for address-line1 and address-line2 r=credential-management-reviewers,issammani
Differential Revision: https://phabricator.services.mozilla.com/D180791
This commit is contained in:
Родитель
c466bdec21
Коммит
63eef94a13
|
@ -11,6 +11,7 @@ support-files =
|
|||
[browser_de_fields.js]
|
||||
[browser_fr_fields.js]
|
||||
[browser_ignore_invisible_fields.js]
|
||||
[browser_label_rules.js]
|
||||
[browser_multiple_section.js]
|
||||
[browser_parse_address_fields.js]
|
||||
[browser_parse_creditcard_expiry_fields.js]
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
/* Any copyright is dedicated to the Public Domain.
|
||||
http://creativecommons.org/publicdomain/zero/1.0/ */
|
||||
|
||||
/* global add_heuristic_tests */
|
||||
|
||||
"use strict";
|
||||
|
||||
add_heuristic_tests([
|
||||
{
|
||||
fixtureData: `
|
||||
<html>
|
||||
<body>
|
||||
<form>
|
||||
<input type="text" id="name" autocomplete="name"/>
|
||||
<input type="text" id="country" autocomplete="country"/>
|
||||
<label for="test1">sender-address</label>
|
||||
<input type="text" id="test1"/>
|
||||
<input type="text" id="test2" name="sender-address"/>
|
||||
</form>
|
||||
</body>
|
||||
</html>`,
|
||||
expectedResult: [
|
||||
{
|
||||
default: {
|
||||
reason: "regex-heuristic",
|
||||
},
|
||||
description: `Only "sender-address" keywords in labels"`,
|
||||
fields: [
|
||||
{ fieldName: "name", reason: "autocomplete" },
|
||||
{ fieldName: "country", reason: "autocomplete" },
|
||||
{ fieldName: "address-line1" },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
|
@ -26,6 +26,12 @@ add_heuristic_tests(
|
|||
{ fieldName: "email" },
|
||||
],
|
||||
},
|
||||
{
|
||||
invalid: true,
|
||||
fields: [
|
||||
{ fieldName: "address-line1", reason:"regex-heuristic" },
|
||||
],
|
||||
},
|
||||
{
|
||||
default: {
|
||||
reason: "regex-heuristic",
|
||||
|
@ -45,6 +51,12 @@ add_heuristic_tests(
|
|||
{ fieldName: "email" },
|
||||
],
|
||||
},
|
||||
{
|
||||
invalid: true,
|
||||
fields: [
|
||||
{ fieldName: "address-line1", reason:"regex-heuristic" },
|
||||
],
|
||||
},
|
||||
{
|
||||
invalid: true,
|
||||
fields: [
|
||||
|
@ -94,6 +106,12 @@ add_heuristic_tests(
|
|||
{ fieldName: "email" },
|
||||
],
|
||||
},
|
||||
{
|
||||
invalid: true,
|
||||
fields: [
|
||||
{ fieldName: "address-line1", reason:"regex-heuristic" },
|
||||
],
|
||||
},
|
||||
{
|
||||
default: {
|
||||
reason: "regex-heuristic",
|
||||
|
@ -113,6 +131,12 @@ add_heuristic_tests(
|
|||
{ fieldName: "email" },
|
||||
],
|
||||
},
|
||||
{
|
||||
invalid: true,
|
||||
fields: [
|
||||
{ fieldName: "address-line1", reason:"regex-heuristic" },
|
||||
],
|
||||
},
|
||||
{
|
||||
invalid: true,
|
||||
fields: [
|
||||
|
|
|
@ -88,6 +88,7 @@ export class FormSection {
|
|||
*/
|
||||
export const FormAutofillHeuristics = {
|
||||
RULES: HeuristicsRegExp.getRules(),
|
||||
LABEL_RULES: HeuristicsRegExp.getLabelRules(),
|
||||
|
||||
CREDIT_CARD_FIELDNAMES: [],
|
||||
ADDRESS_FIELDNAMES: [],
|
||||
|
@ -260,7 +261,7 @@ export const FormAutofillHeuristics = {
|
|||
) {
|
||||
const regExpTelExtension = new RegExp(
|
||||
"\\bext|ext\\b|extension|ramal", // pt-BR, pt-PT
|
||||
"iu"
|
||||
"iug"
|
||||
);
|
||||
if (this._matchRegexp(field.element, regExpTelExtension)) {
|
||||
scanner.updateFieldName(scanner.parsingIndex, "tel-extension");
|
||||
|
@ -872,15 +873,21 @@ export const FormAutofillHeuristics = {
|
|||
* Extract all the signature strings of an element.
|
||||
*
|
||||
* @param {HTMLElement} element
|
||||
* @returns {ElementStrings}
|
||||
* @returns {Array<string>}
|
||||
*/
|
||||
_getElementStrings(element) {
|
||||
return [element.id, element.name, element.placeholder?.trim()];
|
||||
},
|
||||
|
||||
/**
|
||||
* Extract all the label strings associated with an element.
|
||||
*
|
||||
* @param {HTMLElement} element
|
||||
* @returns {ElementStrings}
|
||||
*/
|
||||
_getElementLabelStrings(element) {
|
||||
return {
|
||||
*[Symbol.iterator]() {
|
||||
yield element.id;
|
||||
yield element.name;
|
||||
yield element.placeholder?.trim();
|
||||
|
||||
const labels = lazy.LabelUtils.findLabelElements(element);
|
||||
for (let label of labels) {
|
||||
yield* lazy.LabelUtils.extractLabelStrings(label);
|
||||
|
@ -912,46 +919,75 @@ export const FormAutofillHeuristics = {
|
|||
},
|
||||
|
||||
/**
|
||||
* Find the first matched field name of the element wih given regex list.
|
||||
* Find the first matching field name from a given list of field names
|
||||
* that matches an HTML element.
|
||||
*
|
||||
* @param {HTMLElement} element
|
||||
* @param {Array<string>} regexps
|
||||
* The regex key names that correspond to pattern in the rule list. It will
|
||||
* be matched against the element string converted to lower case.
|
||||
* @returns {?string} The first matched field name
|
||||
* The function first tries to match the element against a set of
|
||||
* pre-defined regular expression rules. If no match is found, it
|
||||
* then checks for label-specific rules, if they exist.
|
||||
*
|
||||
* Note: For label rules, the keyword is often more general
|
||||
* (e.g., "^\\W*address"), hence they are only searched within labels
|
||||
* to reduce the occurrence of false positives.
|
||||
*
|
||||
* @param {HTMLElement} element The element to match.
|
||||
* @param {Array<string>} fieldNames An array of field names to compare against.
|
||||
* @returns {string|null} The name of the matched field, or null if no match was found.
|
||||
*/
|
||||
_findMatchedFieldName(element, regexps) {
|
||||
if (!regexps.length) {
|
||||
_findMatchedFieldName(element, fieldNames) {
|
||||
if (!fieldNames.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const getElementStrings = this._getElementStrings(element);
|
||||
for (let regexp of regexps) {
|
||||
for (let string of getElementStrings) {
|
||||
if (this.testRegex(this.RULES[regexp], string?.toLowerCase())) {
|
||||
return regexp;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Attempt to match the element against the default set of rules
|
||||
let matchedFieldName = fieldNames.find(fieldName =>
|
||||
this._matchRegexp(element, this.RULES[fieldName])
|
||||
);
|
||||
|
||||
return null;
|
||||
// If no match is found, and if a label rule exists for the field,
|
||||
// attempt to match against the label rules
|
||||
if (!matchedFieldName) {
|
||||
matchedFieldName = fieldNames.find(fieldName => {
|
||||
const regexp = this.LABEL_RULES[fieldName];
|
||||
return this._matchRegexp(element, regexp, { attribute: false });
|
||||
});
|
||||
}
|
||||
return matchedFieldName;
|
||||
},
|
||||
|
||||
/**
|
||||
* Determine whether the regexp can match any of element strings.
|
||||
*
|
||||
* @param {HTMLElement} element
|
||||
* @param {RegExp} regexp
|
||||
*
|
||||
* @returns {boolean}
|
||||
* @param {HTMLElement} element The HTML element to match.
|
||||
* @param {RegExp} regexp The regular expression to match against.
|
||||
* @param {object} [options] Optional parameters for matching.
|
||||
* @param {boolean} [options.attribute=true]
|
||||
* Whether to match against the element's attributes.
|
||||
* @param {boolean} [options.label=true]
|
||||
* Whether to match against the element's labels.
|
||||
* @returns {boolean} True if a match is found, otherwise false.
|
||||
*/
|
||||
_matchRegexp(element, regexp) {
|
||||
const elemStrings = this._getElementStrings(element);
|
||||
for (const str of elemStrings) {
|
||||
if (regexp.test(str)) {
|
||||
_matchRegexp(element, regexp, { attribute = true, label = true } = {}) {
|
||||
if (!regexp) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (attribute) {
|
||||
const elemStrings = this._getElementStrings(element);
|
||||
if (elemStrings.find(s => this.testRegex(regexp, s?.toLowerCase()))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (label) {
|
||||
const elementLabelStrings = this._getElementLabelStrings(element);
|
||||
for (const s of elementLabelStrings) {
|
||||
if (this.testRegex(regexp, s?.toLowerCase())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
},
|
||||
|
||||
|
|
|
@ -33,6 +33,12 @@ export const HeuristicsRegExp = {
|
|||
"cc-type": undefined,
|
||||
},
|
||||
|
||||
// regular expressions that only apply to label
|
||||
LABEL_RULES: {
|
||||
"address-line1": undefined,
|
||||
"address-line2": undefined,
|
||||
},
|
||||
|
||||
RULE_SETS: [
|
||||
//=========================================================================
|
||||
// Firefox-specific rules
|
||||
|
@ -596,35 +602,76 @@ export const HeuristicsRegExp = {
|
|||
},
|
||||
],
|
||||
|
||||
_getRule(name) {
|
||||
let rules = [];
|
||||
this.RULE_SETS.forEach(set => {
|
||||
if (set[name]) {
|
||||
// Add the rule.
|
||||
// We make the regex lower case so that we can match it against the
|
||||
// lower-cased field name and get a rough equivalent of a case-insensitive
|
||||
// match. This avoids a performance cliff with the "iu" flag on regular
|
||||
// expressions.
|
||||
rules.push(`(${set[name].toLowerCase()})`.normalize("NFKC"));
|
||||
}
|
||||
});
|
||||
LABEL_RULE_SETS: [
|
||||
{
|
||||
"address-line1":
|
||||
"(^\\W*address)" +
|
||||
"|(address\\W*$)" +
|
||||
"|(?:shipping|billing|mailing|pick.?up|drop.?off|delivery|sender|postal|" +
|
||||
"recipient|home|work|office|school|business|mail)[\\s\\-]+address" +
|
||||
"|address\\s+(of|for|to|from)" +
|
||||
"|adresse" + // fr-FR
|
||||
"|indirizzo" + // it-IT
|
||||
"|住所" + // ja-JP
|
||||
"|地址" + // zh-CN
|
||||
"|(\\b|_)adres(?! tarifi)(\\b|_)" + // tr
|
||||
"|주소" + // ko-KR
|
||||
"|^alamat" + // id
|
||||
// Should contain street and any other address component, in any order
|
||||
"|street.*(house|building|apartment|floor)" + // en
|
||||
"|(house|building|apartment|floor).*street" +
|
||||
"|(sokak|cadde).*(apartman|bina|daire|mahalle)" + // tr
|
||||
"|(apartman|bina|daire|mahalle).*(sokak|cadde)" +
|
||||
"|улиц.*(дом|корпус|квартир|этаж)|(дом|корпус|квартир|этаж).*улиц", // ru
|
||||
},
|
||||
{
|
||||
"address-line2":
|
||||
"address|line" +
|
||||
"|adresse" + // fr-FR
|
||||
"|indirizzo" + // it-IT
|
||||
"|地址" + // zh-CN
|
||||
"|주소", // ko-KR
|
||||
},
|
||||
],
|
||||
|
||||
const value = new RegExp(rules.join("|"), "gu");
|
||||
Object.defineProperty(this.RULES, name, { get: undefined });
|
||||
Object.defineProperty(this.RULES, name, { value });
|
||||
return value;
|
||||
},
|
||||
_getRules(rules, rulesets) {
|
||||
function computeRule(name) {
|
||||
let regexps = [];
|
||||
rulesets.forEach(set => {
|
||||
if (set[name]) {
|
||||
// Add the rule.
|
||||
// We make the regex lower case so that we can match it against the
|
||||
// lower-cased field name and get a rough equivalent of a case-insensitive
|
||||
// match. This avoids a performance cliff with the "iu" flag on regular
|
||||
// expressions.
|
||||
regexps.push(`(${set[name].toLowerCase()})`.normalize("NFKC"));
|
||||
}
|
||||
});
|
||||
|
||||
getRules() {
|
||||
Object.keys(this.RULES).forEach(field =>
|
||||
Object.defineProperty(this.RULES, field, {
|
||||
const value = new RegExp(regexps.join("|"), "gu");
|
||||
|
||||
Object.defineProperty(rules, name, { get: undefined });
|
||||
Object.defineProperty(rules, name, { value });
|
||||
return value;
|
||||
}
|
||||
|
||||
Object.keys(rules).forEach(field =>
|
||||
Object.defineProperty(rules, field, {
|
||||
get() {
|
||||
return HeuristicsRegExp._getRule(field);
|
||||
return computeRule(field);
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
return this.RULES;
|
||||
return rules;
|
||||
},
|
||||
|
||||
getLabelRules() {
|
||||
return this._getRules(this.LABEL_RULES, this.LABEL_RULE_SETS);
|
||||
},
|
||||
|
||||
getRules() {
|
||||
return this._getRules(this.RULES, this.RULE_SETS);
|
||||
},
|
||||
};
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче