Bug 1760728 - Support importing non-UTF8 csv file to address book. r=benc

Introduce MailStringUtils.jsm to read and detect the charset of a file, then convert to DOMString.

Differential Revision: https://phabricator.services.mozilla.com/D141715

--HG--
extra : amend_source : 4b3172060dc4fb8070b6d227d1accf4d8a261bfa
This commit is contained in:
Ping Chen 2022-03-23 12:10:03 +02:00
Родитель fcabc6e10a
Коммит 9bcc0f011f
6 изменённых файлов: 108 добавлений и 38 удалений

Просмотреть файл

@ -0,0 +1,78 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
const EXPORTED_SYMBOLS = ["MailStringUtils"];
var MailStringUtils = {
/**
* Convert a ByteString to a Uint8Array.
* @param {ByteString} str - The input string.
* @returns {Uint8Array} The output Uint8Array.
*/
byteStringToUint8Array(str) {
let arr = new Uint8Array(str.length);
for (let i = 0; i < str.length; i++) {
arr[i] = str.charCodeAt(i);
}
return arr;
},
/**
* Convert a Uint8Array to a ByteString.
* @param {Uint8Array} arr - The input Uint8Array.
* @returns {ByteString} The output string.
*/
uint8ArrayToByteString(arr) {
let str = "";
for (let i = 0; i < arr.length; i += 65536) {
str += String.fromCharCode.apply(null, arr.subarray(i, i + 65536));
}
return str;
},
/**
* Detect the text encoding of a ByteString.
* @param {ByteString} str - The input string.
* @returns {string} The output charset name.
*/
detectCharset(str) {
// Check the BOM.
let charset = "";
if (str.length >= 2) {
let byte0 = str.charCodeAt(0);
let byte1 = str.charCodeAt(1);
let byte2 = str.charCodeAt(2);
if (byte0 == 0xfe && byte1 == 0xff) {
charset = "UTF-16BE";
} else if (byte0 == 0xff && byte1 == 0xfe) {
charset = "UTF-16LE";
} else if (byte0 == 0xef && byte1 == 0xbb && byte2 == 0xbf) {
charset = "UTF-8";
}
}
if (charset) {
return charset;
}
// Use mozilla::EncodingDetector.
let compUtils = Cc[
"@mozilla.org/messengercompose/computils;1"
].createInstance(Ci.nsIMsgCompUtils);
return compUtils.detectCharset(str);
},
/**
* Read and detect the charset of a file, then convert the file content to
* DOMString. If you're absolutely sure it's a UTF-8 encoded file, use
* IOUtils.readUTF8 instead.
* @param {string} path - An absolute file path.
* @returns {DOMString} The file content.
*/
async readEncoded(path) {
let arr = await IOUtils.read(path);
let str = this.uint8ArrayToByteString(arr);
let charset = this.detectCharset(str);
return new TextDecoder(charset).decode(arr);
},
};

Просмотреть файл

@ -125,6 +125,7 @@ EXTRA_JS_MODULES += [
"MailNotificationService.jsm",
"MailServices.jsm",
"mailstoreConverter.jsm",
"MailStringUtils.jsm",
"MsgAsyncPrompter.jsm",
"MsgDBCacheManager.jsm",
"MsgIncomingServer.jsm",

Просмотреть файл

@ -700,42 +700,6 @@ var MsgUtils = {
return value;
},
/**
* Pick a charset according to content type and content.
* @param {string} contentType - The content type.
* @param {string} content - The content.
* @returns {string}
*/
pickCharset(contentType, content) {
if (!contentType.startsWith("text")) {
return "";
}
// Check the BOM.
let charset = "";
if (content.length >= 2) {
let byte0 = content.charCodeAt(0);
let byte1 = content.charCodeAt(1);
let byte2 = content.charCodeAt(2);
if (byte0 == 0xfe && byte1 == 0xff) {
charset = "UTF-16BE";
} else if (byte0 == 0xff && byte1 == 0xfe) {
charset = "UTF-16LE";
} else if (byte0 == 0xef && byte1 == 0xbb && byte2 == 0xbf) {
charset = "UTF-8";
}
}
if (charset) {
return charset;
}
// Use mozilla::EncodingDetector.
let compUtils = Cc[
"@mozilla.org/messengercompose/computils;1"
].createInstance(Ci.nsIMsgCompUtils);
return compUtils.detectCharset(content);
},
/**
* Given a string, convert it to 'qtext' (quoted text) for RFC822 header
* purposes.

Просмотреть файл

@ -10,6 +10,9 @@ let { MimeEncoder } = ChromeUtils.import("resource:///modules/MimeEncoder.jsm");
let { MsgUtils } = ChromeUtils.import(
"resource:///modules/MimeMessageUtils.jsm"
);
var { MailStringUtils } = ChromeUtils.import(
"resource:///modules/MailStringUtils.jsm"
);
var { NetUtil } = ChromeUtils.import("resource://gre/modules/NetUtil.jsm");
/**
@ -303,7 +306,9 @@ class MimePart {
this._bodyAttachment.name
);
}
this._charset = MsgUtils.pickCharset(this._contentType, content);
this._charset = this._contentType
? MailStringUtils.detectCharset(content)
: "";
let contentTypeParams = "";
if (this._charset) {

Просмотреть файл

@ -12,6 +12,7 @@ XPCOMUtils.defineLazyModuleGetters(this, {
Services: "resource://gre/modules/Services.jsm",
setTimeout: "resource://gre/modules/Timer.jsm",
MailServices: "resource:///modules/MailServices.jsm",
MailStringUtils: "resource:///modules/MailStringUtils.jsm",
exportAttributes: "resource:///modules/AddrBookUtils.jsm",
});
@ -94,7 +95,7 @@ class AddrBookFileImporter {
* @returns {string[][]}
*/
async parseCsvFile(sourceFile) {
let content = await IOUtils.readUTF8(sourceFile.path);
let content = await MailStringUtils.readEncoded(sourceFile.path);
let csvRows = d3.csv.parseRows(content);
let tsvRows = d3.tsv.parseRows(content);
@ -125,6 +126,15 @@ class AddrBookFileImporter {
this._csvProperties = [];
// Get the nsIAbCard properties corresponding to the user supplied file.
for (let field of this._csvRows[0]) {
if (
!field &&
this._csvRows[0].length > 1 &&
field == this._csvRows[0].at(-1)
) {
// This is the last field and empty, caused by a trailing comma, which
// is OK.
return [];
}
let index = supportedFieldNames.indexOf(field.toLowerCase());
if (index == -1) {
return this._csvRows;

Просмотреть файл

@ -74,6 +74,18 @@ add_task(async function test_importCsvFile() {
// A comma separated file with some fields containing quotes.
await test_importAbFile("csv", "resources/quote.csv", "quote_csv");
// Non-UTF8 csv file.
await test_importAbFile(
"csv",
"resources/shiftjis_addressbook.csv",
"shiftjis_csv"
);
await test_importAbFile(
"csv",
"resources/utf16_addressbook.csv",
"utf16_csv"
);
});
/** Test importing .vcf file works. */