зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1348751 - [Form Autofill] A utility library for handling full name and separated names, r=MattN
MozReview-Commit-ID: 3rcuxbFHKOq --HG-- extra : rebase_source : 3b99534f8539f2e5425bbc1d543ababb2a3ee6e1
This commit is contained in:
Родитель
274b2c935b
Коммит
38318a7f55
|
@ -0,0 +1,280 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
"use strict";
|
||||
|
||||
const {classes: Cc, interfaces: Ci, utils: Cu, results: Cr} = Components;
|
||||
|
||||
// Cu.import loads jsm files based on ISO-Latin-1 for now (see bug 530257).
|
||||
// However, the references about name parts include multi-byte characters.
|
||||
// Thus, we use |loadSubScript| to load the references instead.
|
||||
const NAME_REFERENCES = "chrome://formautofill/content/nameReferences.js";
|
||||
|
||||
this.EXPORTED_SYMBOLS = ["FormAutofillNameUtils"];
|
||||
|
||||
// FormAutofillNameUtils is initially translated from
|
||||
// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util.cc?rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
|
||||
var FormAutofillNameUtils = {
|
||||
// Will be loaded from NAME_REFERENCES.
|
||||
NAME_PREFIXES: [],
|
||||
NAME_SUFFIXES: [],
|
||||
FAMILY_NAME_PREFIXES: [],
|
||||
COMMON_CJK_MULTI_CHAR_SURNAMES: [],
|
||||
KOREAN_MULTI_CHAR_SURNAMES: [],
|
||||
|
||||
// The whitespace definition based on
|
||||
// https://cs.chromium.org/chromium/src/base/strings/string_util_constants.cc?l=9&rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
|
||||
WHITESPACE: [
|
||||
"\u0009", // CHARACTER TABULATION
|
||||
"\u000A", // LINE FEED (LF)
|
||||
"\u000B", // LINE TABULATION
|
||||
"\u000C", // FORM FEED (FF)
|
||||
"\u000D", // CARRIAGE RETURN (CR)
|
||||
"\u0020", // SPACE
|
||||
"\u0085", // NEXT LINE (NEL)
|
||||
"\u00A0", // NO-BREAK SPACE
|
||||
"\u1680", // OGHAM SPACE MARK
|
||||
"\u2000", // EN QUAD
|
||||
"\u2001", // EM QUAD
|
||||
"\u2002", // EN SPACE
|
||||
"\u2003", // EM SPACE
|
||||
"\u2004", // THREE-PER-EM SPACE
|
||||
"\u2005", // FOUR-PER-EM SPACE
|
||||
"\u2006", // SIX-PER-EM SPACE
|
||||
"\u2007", // FIGURE SPACE
|
||||
"\u2008", // PUNCTUATION SPACE
|
||||
"\u2009", // THIN SPACE
|
||||
"\u200A", // HAIR SPACE
|
||||
"\u2028", // LINE SEPARATOR
|
||||
"\u2029", // PARAGRAPH SEPARATOR
|
||||
"\u202F", // NARROW NO-BREAK SPACE
|
||||
"\u205F", // MEDIUM MATHEMATICAL SPACE
|
||||
"\u3000", // IDEOGRAPHIC SPACE
|
||||
],
|
||||
|
||||
// The middle dot is used as a separator for foreign names in Japanese.
|
||||
MIDDLE_DOT: [
|
||||
"\u30FB", // KATAKANA MIDDLE DOT
|
||||
"\u00B7", // A (common?) typo for "KATAKANA MIDDLE DOT"
|
||||
],
|
||||
|
||||
// The Unicode range is based on Wiki:
|
||||
// https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
|
||||
// https://en.wikipedia.org/wiki/Hangul
|
||||
// https://en.wikipedia.org/wiki/Japanese_writing_system
|
||||
CJK_RANGE: [
|
||||
"\u1100-\u11FF", // Hangul Jamo
|
||||
"\u3040-\u309F", // Hiragana
|
||||
"\u30A0-\u30FF", // Katakana
|
||||
"\u3105-\u312C", // Bopomofo
|
||||
"\u3130-\u318F", // Hangul Compatibility Jamo
|
||||
"\u31F0-\u31FF", // Katakana Phonetic Extensions
|
||||
"\u3200-\u32FF", // Enclosed CJK Letters and Months
|
||||
"\u3400-\u4DBF", // CJK unified ideographs Extension A
|
||||
"\u4E00-\u9FFF", // CJK Unified Ideographs
|
||||
"\uA960-\uA97F", // Hangul Jamo Extended-A
|
||||
"\uAC00-\uD7AF", // Hangul Syllables
|
||||
"\uD7B0-\uD7FF", // Hangul Jamo Extended-B
|
||||
"\uFF00-\uFFEF", // Halfwidth and Fullwidth Forms
|
||||
],
|
||||
|
||||
HANGUL_RANGE: [
|
||||
"\u1100-\u11FF", // Hangul Jamo
|
||||
"\u3130-\u318F", // Hangul Compatibility Jamo
|
||||
"\uA960-\uA97F", // Hangul Jamo Extended-A
|
||||
"\uAC00-\uD7AF", // Hangul Syllables
|
||||
"\uD7B0-\uD7FF", // Hangul Jamo Extended-B
|
||||
],
|
||||
|
||||
_dataLoaded: false,
|
||||
|
||||
// Returns true if |set| contains |token|, modulo a final period.
|
||||
_containsString(set, token) {
|
||||
let target = token.replace(/\.$/, "").toLowerCase();
|
||||
return set.includes(target);
|
||||
},
|
||||
|
||||
// Removes common name prefixes from |name_tokens|.
|
||||
_stripPrefixes(nameTokens) {
|
||||
for (let i in nameTokens) {
|
||||
if (!this._containsString(this.NAME_PREFIXES, nameTokens[i])) {
|
||||
return nameTokens.slice(i);
|
||||
}
|
||||
}
|
||||
return [];
|
||||
},
|
||||
|
||||
// Removes common name suffixes from |name_tokens|.
|
||||
_stripSuffixes(nameTokens) {
|
||||
for (let i = nameTokens.length - 1; i >= 0; i--) {
|
||||
if (!this._containsString(this.NAME_SUFFIXES, nameTokens[i])) {
|
||||
return nameTokens.slice(0, i + 1);
|
||||
}
|
||||
}
|
||||
return [];
|
||||
},
|
||||
|
||||
_isCJKName(name) {
|
||||
// The name is considered to be a CJK name if it is only CJK characters,
|
||||
// spaces, and "middle dot" separators, with at least one CJK character, and
|
||||
// no more than 2 words.
|
||||
//
|
||||
// Chinese and Japanese names are usually spelled out using the Han
|
||||
// characters (logographs), which constitute the "CJK Unified Ideographs"
|
||||
// block in Unicode, also referred to as Unihan. Korean names are usually
|
||||
// spelled out in the Korean alphabet (Hangul), although they do have a Han
|
||||
// equivalent as well.
|
||||
|
||||
let previousWasCJK = false;
|
||||
let wordCount = 0;
|
||||
|
||||
for (let c of name) {
|
||||
let isMiddleDot = this.MIDDLE_DOT.includes(c);
|
||||
let isCJK = !isMiddleDot && this.reCJK.test(c);
|
||||
if (!isCJK && !isMiddleDot && !this.WHITESPACE.includes(c)) {
|
||||
return false;
|
||||
}
|
||||
if (isCJK && !previousWasCJK) {
|
||||
wordCount++;
|
||||
}
|
||||
previousWasCJK = isCJK;
|
||||
}
|
||||
|
||||
return wordCount > 0 && wordCount < 3;
|
||||
},
|
||||
|
||||
// Tries to split a Chinese, Japanese, or Korean name into its given name &
|
||||
// surname parts. If splitting did not work for whatever reason, returns null.
|
||||
_splitCJKName(nameTokens) {
|
||||
// The convention for CJK languages is to put the surname (last name) first,
|
||||
// and the given name (first name) second. In a continuous text, there is
|
||||
// normally no space between the two parts of the name. When entering their
|
||||
// name into a field, though, some people add a space to disambiguate. CJK
|
||||
// names (almost) never have a middle name.
|
||||
|
||||
let reHangulName = new RegExp(
|
||||
"^[" + this.HANGUL_RANGE.join("") + this.WHITESPACE.join("") + "]+$", "u");
|
||||
let nameParts = {
|
||||
given: "",
|
||||
middle: "",
|
||||
family: "",
|
||||
};
|
||||
|
||||
if (nameTokens.length == 1) {
|
||||
// There is no space between the surname and given name. Try to infer
|
||||
// where to separate between the two. Most Chinese and Korean surnames
|
||||
// have only one character, but there are a few that have 2. If the name
|
||||
// does not start with a surname from a known list, default to one
|
||||
// character.
|
||||
let name = nameTokens[0];
|
||||
let isKorean = reHangulName.test(name);
|
||||
let surnameLength = 0;
|
||||
|
||||
// 4-character Korean names are more likely to be 2/2 than 1/3, so use
|
||||
// the full list of Korean 2-char surnames. (instead of only the common
|
||||
// ones)
|
||||
let multiCharSurnames = (isKorean && name.length > 3) ?
|
||||
this.KOREAN_MULTI_CHAR_SURNAMES :
|
||||
this.COMMON_CJK_MULTI_CHAR_SURNAMES;
|
||||
|
||||
// Default to 1 character if the surname is not in the list.
|
||||
surnameLength =
|
||||
multiCharSurnames.some(surname => name.startsWith(surname)) ? 2 : 1;
|
||||
|
||||
nameParts.family = name.substr(0, surnameLength);
|
||||
nameParts.given = name.substr(surnameLength);
|
||||
} else if (nameTokens.length == 2) {
|
||||
// The user entered a space between the two name parts. This makes our job
|
||||
// easier. Family name first, given name second.
|
||||
nameParts.family = nameTokens[0];
|
||||
nameParts.given = nameTokens[1];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
return nameParts;
|
||||
},
|
||||
|
||||
init() {
|
||||
if (this._dataLoaded) {
|
||||
return;
|
||||
}
|
||||
let sandbox = {};
|
||||
let scriptLoader = Cc["@mozilla.org/moz/jssubscript-loader;1"]
|
||||
.getService(Ci.mozIJSSubScriptLoader);
|
||||
scriptLoader.loadSubScript(NAME_REFERENCES, sandbox, "utf-8");
|
||||
Object.assign(this, sandbox.nameReferences);
|
||||
this._dataLoaded = true;
|
||||
|
||||
this.reCJK = new RegExp("[" + this.CJK_RANGE.join("") + "]", "u");
|
||||
},
|
||||
|
||||
splitName(name) {
|
||||
let nameTokens = name.trim().split(/[ ,\u3000\u30FB\u00B7]+/);
|
||||
let nameParts = {
|
||||
given: "",
|
||||
middle: "",
|
||||
family: "",
|
||||
};
|
||||
|
||||
nameTokens = this._stripPrefixes(nameTokens);
|
||||
|
||||
if (this._isCJKName(name)) {
|
||||
let parts = this._splitCJKName(nameTokens);
|
||||
if (parts) {
|
||||
return parts;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't assume "Ma" is a suffix in John Ma.
|
||||
if (nameTokens.length > 2) {
|
||||
nameTokens = this._stripSuffixes(nameTokens);
|
||||
}
|
||||
|
||||
if (!nameTokens.length) {
|
||||
// Bad things have happened; just assume the whole thing is a given name.
|
||||
nameParts.given = name;
|
||||
return nameParts;
|
||||
}
|
||||
|
||||
// Only one token, assume given name.
|
||||
if (nameTokens.length == 1) {
|
||||
nameParts.given = nameTokens[0];
|
||||
return nameParts;
|
||||
}
|
||||
|
||||
// 2 or more tokens. Grab the family, which is the last word plus any
|
||||
// recognizable family prefixes.
|
||||
let familyTokens = [nameTokens.pop()];
|
||||
while (nameTokens.length) {
|
||||
let lastToken = nameTokens[nameTokens.length - 1];
|
||||
if (!this._containsString(this.FAMILY_NAME_PREFIXES, lastToken)) {
|
||||
break;
|
||||
}
|
||||
familyTokens.unshift(lastToken);
|
||||
nameTokens.pop();
|
||||
}
|
||||
nameParts.family = familyTokens.join(" ");
|
||||
|
||||
// Take the last remaining token as the middle name (if there are at least 2
|
||||
// tokens).
|
||||
if (nameTokens.length >= 2) {
|
||||
nameParts.middle = nameTokens.pop();
|
||||
}
|
||||
|
||||
// Remainder is given name.
|
||||
nameParts.given = nameTokens.join(" ");
|
||||
|
||||
return nameParts;
|
||||
},
|
||||
|
||||
joinNameParts({given, middle, family}) {
|
||||
if (this._isCJKName(given) && this._isCJKName(family) && middle == "") {
|
||||
return family + given;
|
||||
}
|
||||
return [given, middle, family].filter(part => part && part.length).join(" ");
|
||||
},
|
||||
};
|
||||
|
||||
FormAutofillNameUtils.init();
|
|
@ -0,0 +1,144 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/* exported nameReferences */
|
||||
|
||||
"use strict";
|
||||
|
||||
// The data below is initially copied from
|
||||
// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util.cc?rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
|
||||
var nameReferences = {
|
||||
NAME_PREFIXES: [
|
||||
"1lt",
|
||||
"1st",
|
||||
"2lt",
|
||||
"2nd",
|
||||
"3rd",
|
||||
"admiral",
|
||||
"capt",
|
||||
"captain",
|
||||
"col",
|
||||
"cpt",
|
||||
"dr",
|
||||
"gen",
|
||||
"general",
|
||||
"lcdr",
|
||||
"lt",
|
||||
"ltc",
|
||||
"ltg",
|
||||
"ltjg",
|
||||
"maj",
|
||||
"major",
|
||||
"mg",
|
||||
"mr",
|
||||
"mrs",
|
||||
"ms",
|
||||
"pastor",
|
||||
"prof",
|
||||
"rep",
|
||||
"reverend",
|
||||
"rev",
|
||||
"sen",
|
||||
"st",
|
||||
],
|
||||
|
||||
NAME_SUFFIXES: [
|
||||
"b.a",
|
||||
"ba",
|
||||
"d.d.s",
|
||||
"dds",
|
||||
"i",
|
||||
"ii",
|
||||
"iii",
|
||||
"iv",
|
||||
"ix",
|
||||
"jr",
|
||||
"m.a",
|
||||
"m.d",
|
||||
"ma",
|
||||
"md",
|
||||
"ms",
|
||||
"ph.d",
|
||||
"phd",
|
||||
"sr",
|
||||
"v",
|
||||
"vi",
|
||||
"vii",
|
||||
"viii",
|
||||
"x",
|
||||
],
|
||||
|
||||
FAMILY_NAME_PREFIXES: [
|
||||
"d'",
|
||||
"de",
|
||||
"del",
|
||||
"der",
|
||||
"di",
|
||||
"la",
|
||||
"le",
|
||||
"mc",
|
||||
"san",
|
||||
"st",
|
||||
"ter",
|
||||
"van",
|
||||
"von",
|
||||
],
|
||||
|
||||
// The common and non-ambiguous CJK surnames (last names) that have more than
|
||||
// one character.
|
||||
COMMON_CJK_MULTI_CHAR_SURNAMES: [
|
||||
// Korean, taken from the list of surnames:
|
||||
// https://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EC%84%B1%EC%94%A8_%EB%AA%A9%EB%A1%9D
|
||||
"남궁",
|
||||
"사공",
|
||||
"서문",
|
||||
"선우",
|
||||
"제갈",
|
||||
"황보",
|
||||
"독고",
|
||||
"망절",
|
||||
|
||||
// Chinese, taken from the top 10 Chinese 2-character surnames:
|
||||
// https://zh.wikipedia.org/wiki/%E8%A4%87%E5%A7%93#.E5.B8.B8.E8.A6.8B.E7.9A.84.E8.A4.87.E5.A7.93
|
||||
// Simplified Chinese (mostly mainland China)
|
||||
"欧阳",
|
||||
"令狐",
|
||||
"皇甫",
|
||||
"上官",
|
||||
"司徒",
|
||||
"诸葛",
|
||||
"司马",
|
||||
"宇文",
|
||||
"呼延",
|
||||
"端木",
|
||||
// Traditional Chinese (mostly Taiwan)
|
||||
"張簡",
|
||||
"歐陽",
|
||||
"諸葛",
|
||||
"申屠",
|
||||
"尉遲",
|
||||
"司馬",
|
||||
"軒轅",
|
||||
"夏侯",
|
||||
],
|
||||
|
||||
// All Korean surnames that have more than one character, even the
|
||||
// rare/ambiguous ones.
|
||||
KOREAN_MULTI_CHAR_SURNAMES: [
|
||||
"강전",
|
||||
"남궁",
|
||||
"독고",
|
||||
"동방",
|
||||
"망절",
|
||||
"사공",
|
||||
"서문",
|
||||
"선우",
|
||||
"소봉",
|
||||
"어금",
|
||||
"장곡",
|
||||
"제갈",
|
||||
"황목",
|
||||
"황보",
|
||||
],
|
||||
};
|
|
@ -0,0 +1,76 @@
|
|||
/**
|
||||
* Tests the "isCJKName" function of FormAutofillNameUtils object.
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
Cu.import("resource://gre/modules/Task.jsm");
|
||||
Cu.import("resource://formautofill/FormAutofillNameUtils.jsm");
|
||||
|
||||
// Test cases is initially copied from
|
||||
// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util_unittest.cc
|
||||
const TESTCASES = [
|
||||
{
|
||||
// Non-CJK language with only ASCII characters.
|
||||
fullName: "Homer Jay Simpson",
|
||||
expectedResult: false,
|
||||
},
|
||||
{
|
||||
// Non-CJK language with some ASCII characters.
|
||||
fullName: "Éloïse Paré",
|
||||
expectedResult: false,
|
||||
},
|
||||
{
|
||||
// Non-CJK language with no ASCII characters.
|
||||
fullName: "Σωκράτης",
|
||||
expectedResult: false,
|
||||
},
|
||||
{
|
||||
// (Simplified) Chinese name, Unihan.
|
||||
fullName: "刘翔",
|
||||
expectedResult: true,
|
||||
},
|
||||
{
|
||||
// (Simplified) Chinese name, Unihan, with an ASCII space.
|
||||
fullName: "成 龙",
|
||||
expectedResult: true,
|
||||
},
|
||||
{
|
||||
// Korean name, Hangul.
|
||||
fullName: "송지효",
|
||||
expectedResult: true,
|
||||
},
|
||||
{
|
||||
// Korean name, Hangul, with an 'IDEOGRAPHIC SPACE' (U+3000).
|
||||
fullName: "김 종국",
|
||||
expectedResult: true,
|
||||
},
|
||||
{
|
||||
// Japanese name, Unihan.
|
||||
fullName: "山田貴洋",
|
||||
expectedResult: true,
|
||||
},
|
||||
{
|
||||
// Japanese name, Katakana, with a 'KATAKANA MIDDLE DOT' (U+30FB).
|
||||
fullName: "ビル・ゲイツ",
|
||||
expectedResult: true,
|
||||
},
|
||||
{
|
||||
// Japanese name, Katakana, with a 'MIDDLE DOT' (U+00B7) (likely a typo).
|
||||
fullName: "ビル·ゲイツ",
|
||||
expectedResult: true,
|
||||
},
|
||||
{
|
||||
// CJK names don't have a middle name, so a 3-part name is bogus to us.
|
||||
fullName: "반 기 문",
|
||||
expectedResult: false,
|
||||
},
|
||||
];
|
||||
|
||||
add_task(function* test_isCJKName() {
|
||||
TESTCASES.forEach(testcase => {
|
||||
do_print("Starting testcase: " + testcase.fullName);
|
||||
let result = FormAutofillNameUtils._isCJKName(testcase.fullName);
|
||||
do_check_eq(result, testcase.expectedResult);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,285 @@
|
|||
/**
|
||||
* Tests FormAutofillNameUtils object.
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
Cu.import("resource://gre/modules/Task.jsm");
|
||||
Cu.import("resource://formautofill/FormAutofillNameUtils.jsm");
|
||||
|
||||
// Test cases initially copied from
|
||||
// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util_unittest.cc
|
||||
const TESTCASES = [
|
||||
{
|
||||
description: "Full name including given, middle and family names",
|
||||
fullName: "Homer Jay Simpson",
|
||||
nameParts: {
|
||||
given: "Homer",
|
||||
middle: "Jay",
|
||||
family: "Simpson",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "No middle name",
|
||||
fullName: "Moe Szyslak",
|
||||
nameParts: {
|
||||
given: "Moe",
|
||||
middle: "",
|
||||
family: "Szyslak",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Common name prefixes removed",
|
||||
fullName: "Reverend Timothy Lovejoy",
|
||||
nameParts: {
|
||||
given: "Timothy",
|
||||
middle: "",
|
||||
family: "Lovejoy",
|
||||
},
|
||||
expectedFullName: "Timothy Lovejoy",
|
||||
},
|
||||
{
|
||||
description: "Common name suffixes removed",
|
||||
fullName: "John Frink Phd",
|
||||
nameParts: {
|
||||
given: "John",
|
||||
middle: "",
|
||||
family: "Frink",
|
||||
},
|
||||
expectedFullName: "John Frink",
|
||||
},
|
||||
{
|
||||
description: "Exception to the name suffix removal",
|
||||
fullName: "John Ma",
|
||||
nameParts: {
|
||||
given: "John",
|
||||
middle: "",
|
||||
family: "Ma",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Common family name prefixes not considered a middle name",
|
||||
fullName: "Milhouse Van Houten",
|
||||
nameParts: {
|
||||
given: "Milhouse",
|
||||
middle: "",
|
||||
family: "Van Houten",
|
||||
},
|
||||
},
|
||||
|
||||
// CJK names have reverse order (surname goes first, given name goes second).
|
||||
{
|
||||
description: "Chinese name, Unihan",
|
||||
fullName: "孫 德明",
|
||||
nameParts: {
|
||||
given: "德明",
|
||||
middle: "",
|
||||
family: "孫",
|
||||
},
|
||||
expectedFullName: "孫德明",
|
||||
},
|
||||
{
|
||||
description: "Chinese name, Unihan, \"IDEOGRAPHIC SPACE\"",
|
||||
fullName: "孫 德明",
|
||||
nameParts: {
|
||||
given: "德明",
|
||||
middle: "",
|
||||
family: "孫",
|
||||
},
|
||||
expectedFullName: "孫德明",
|
||||
},
|
||||
{
|
||||
description: "Korean name, Hangul",
|
||||
fullName: "홍 길동",
|
||||
nameParts: {
|
||||
given: "길동",
|
||||
middle: "",
|
||||
family: "홍",
|
||||
},
|
||||
expectedFullName: "홍길동",
|
||||
},
|
||||
{
|
||||
description: "Japanese name, Unihan",
|
||||
fullName: "山田 貴洋",
|
||||
nameParts: {
|
||||
given: "貴洋",
|
||||
middle: "",
|
||||
family: "山田",
|
||||
},
|
||||
expectedFullName: "山田貴洋",
|
||||
},
|
||||
|
||||
// In Japanese, foreign names use 'KATAKANA MIDDLE DOT' (U+30FB) as a
|
||||
// separator. There is no consensus for the ordering. For now, we use the same
|
||||
// ordering as regular Japanese names ("last・first").
|
||||
{
|
||||
description: "Foreign name in Japanese, Katakana",
|
||||
fullName: "ゲイツ・ビル",
|
||||
nameParts: {
|
||||
given: "ビル",
|
||||
middle: "",
|
||||
family: "ゲイツ",
|
||||
},
|
||||
expectedFullName: "ゲイツビル",
|
||||
},
|
||||
|
||||
// 'KATAKANA MIDDLE DOT' is occasionally typoed as 'MIDDLE DOT' (U+00B7).
|
||||
{
|
||||
description: "Foreign name in Japanese, Katakana",
|
||||
fullName: "ゲイツ·ビル",
|
||||
nameParts: {
|
||||
given: "ビル",
|
||||
middle: "",
|
||||
family: "ゲイツ",
|
||||
},
|
||||
expectedFullName: "ゲイツビル",
|
||||
},
|
||||
|
||||
// CJK names don't usually have a space in the middle, but most of the time,
|
||||
// the surname is only one character (in Chinese & Korean).
|
||||
{
|
||||
description: "Korean name, Hangul",
|
||||
fullName: "최성훈",
|
||||
nameParts: {
|
||||
given: "성훈",
|
||||
middle: "",
|
||||
family: "최",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "(Simplified) Chinese name, Unihan",
|
||||
fullName: "刘翔",
|
||||
nameParts: {
|
||||
given: "翔",
|
||||
middle: "",
|
||||
family: "刘",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "(Traditional) Chinese name, Unihan",
|
||||
fullName: "劉翔",
|
||||
nameParts: {
|
||||
given: "翔",
|
||||
middle: "",
|
||||
family: "劉",
|
||||
},
|
||||
},
|
||||
|
||||
// There are a few exceptions. Occasionally, the surname has two characters.
|
||||
{
|
||||
description: "Korean name, Hangul",
|
||||
fullName: "남궁도",
|
||||
nameParts: {
|
||||
given: "도",
|
||||
middle: "",
|
||||
family: "남궁",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Korean name, Hangul",
|
||||
fullName: "황보혜정",
|
||||
nameParts: {
|
||||
given: "혜정",
|
||||
middle: "",
|
||||
family: "황보",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "(Traditional) Chinese name, Unihan",
|
||||
fullName: "歐陽靖",
|
||||
nameParts: {
|
||||
given: "靖",
|
||||
middle: "",
|
||||
family: "歐陽",
|
||||
},
|
||||
},
|
||||
|
||||
// In Korean, some 2-character surnames are rare/ambiguous, like "강전": "강"
|
||||
// is a common surname, and "전" can be part of a given name. In those cases,
|
||||
// we assume it's 1/2 for 3-character names, or 2/2 for 4-character names.
|
||||
{
|
||||
description: "Korean name, Hangul",
|
||||
fullName: "강전희",
|
||||
nameParts: {
|
||||
given: "전희",
|
||||
middle: "",
|
||||
family: "강",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Korean name, Hangul",
|
||||
fullName: "황목치승",
|
||||
nameParts: {
|
||||
given: "치승",
|
||||
middle: "",
|
||||
family: "황목",
|
||||
},
|
||||
},
|
||||
|
||||
// It occasionally happens that a full name is 2 characters, 1/1.
|
||||
{
|
||||
description: "Korean name, Hangul",
|
||||
fullName: "이도",
|
||||
nameParts: {
|
||||
given: "도",
|
||||
middle: "",
|
||||
family: "이",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Korean name, Hangul",
|
||||
fullName: "孫文",
|
||||
nameParts: {
|
||||
given: "文",
|
||||
middle: "",
|
||||
family: "孫",
|
||||
},
|
||||
},
|
||||
|
||||
// These are no CJK names for us, they're just bogus.
|
||||
{
|
||||
description: "Bogus",
|
||||
fullName: "Homer シンプソン",
|
||||
nameParts: {
|
||||
given: "Homer",
|
||||
middle: "",
|
||||
family: "シンプソン",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Bogus",
|
||||
fullName: "ホーマー Simpson",
|
||||
nameParts: {
|
||||
given: "ホーマー",
|
||||
middle: "",
|
||||
family: "Simpson",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "CJK has a middle-name, too unusual",
|
||||
fullName: "반 기 문",
|
||||
nameParts: {
|
||||
given: "반",
|
||||
middle: "기",
|
||||
family: "문",
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
add_task(function* test_splitName() {
|
||||
TESTCASES.forEach(testcase => {
|
||||
if (testcase.fullName) {
|
||||
do_print("Starting testcase: " + testcase.description);
|
||||
let nameParts = FormAutofillNameUtils.splitName(testcase.fullName);
|
||||
Assert.deepEqual(nameParts, testcase.nameParts);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
add_task(function* test_joinName() {
|
||||
TESTCASES.forEach(testcase => {
|
||||
do_print("Starting testcase: " + testcase.description);
|
||||
let name = FormAutofillNameUtils.joinNameParts(testcase.nameParts);
|
||||
do_check_eq(name, testcase.expectedFullName || testcase.fullName);
|
||||
});
|
||||
});
|
|
@ -8,9 +8,10 @@ support-files =
|
|||
[test_enabledStatus.js]
|
||||
[test_findLabelElements.js]
|
||||
[test_getFormInputDetails.js]
|
||||
[test_isCJKName.js]
|
||||
[test_markAsAutofillField.js]
|
||||
[test_nameUtils.js]
|
||||
[test_onFormSubmitted.js]
|
||||
[test_profileAutocompleteResult.js]
|
||||
[test_profileStorage.js]
|
||||
[test_savedFieldNames.js]
|
||||
|
||||
|
|
|
@ -2724,6 +2724,8 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||
|
||||
<p>This license applies to parts of the code in:</p>
|
||||
<ul>
|
||||
<li><span class="path">browser/extensions/formautofill/content/nameReferences.js</span></li>
|
||||
<li><span class="path">browser/extensions/formautofill/FormAutofillNameUtils.jsm</span></li>
|
||||
<li><span class="path">browser/extensions/mortar/host/common/opengles2-utils.jsm</span></li>
|
||||
<li><span class="path">editor/libeditor/EditorEventListener.cpp</span></li>
|
||||
<li><span class="path">security/sandbox/</span></li>
|
||||
|
|
Загрузка…
Ссылка в новой задаче