Bug 1898853 - Add function to convert html snippets to plaintext. r=mkmelin

This adds the function to convert HTML snippets to plain text. It was not
added as an option of `messages.listTextParts()`, because it can be used
in other scenarios as well. Introducing it as a stand-alone function
without being bound to a specific message increases its usefulness.

Differential Revision: https://phabricator.services.mozilla.com/D211001

--HG--
extra : moz-landing-system : lando
This commit is contained in:
John Bieling 2024-05-25 10:27:37 +00:00
Родитель 02f296fa63
Коммит 343706f6a7
3 изменённых файлов: 123 добавлений и 0 удалений

Просмотреть файл

@ -6,6 +6,10 @@ var { MailServices } = ChromeUtils.importESModule(
"resource:///modules/MailServices.sys.mjs"
);
var parserUtils = Cc["@mozilla.org/parserutils;1"].getService(
Ci.nsIParserUtils
);
this.messengerUtilities = class extends ExtensionAPIPersistent {
getAPI() {
const messenger = Cc["@mozilla.org/messenger;1"].createInstance(
@ -25,6 +29,21 @@ this.messengerUtilities = class extends ExtensionAPIPersistent {
email: hdr.email || undefined,
}));
},
async convertToPlainText(body, options) {
let wrapWidth = 0;
let flags =
Ci.nsIDocumentEncoder.OutputLFLineBreak |
Ci.nsIDocumentEncoder.OutputDisallowLineBreaking;
if (options?.flowed) {
wrapWidth = 72;
flags |=
Ci.nsIDocumentEncoder.OutputWrap |
Ci.nsIDocumentEncoder.OutputFormatFlowed;
}
return parserUtils.convertToPlainText(body, flags, wrapWidth).trim();
},
},
};
}

Просмотреть файл

@ -29,6 +29,41 @@
}
],
"functions": [
{
"name": "convertToPlainText",
"type": "function",
"description": "Converts the provided body to readable plain text, without tags and leading/trailing whitespace.",
"async": "callback",
"parameters": [
{
"name": "body",
"type": "string",
"description": "The to-be-converted body."
},
{
"name": "options",
"type": "object",
"optional": true,
"properties": {
"flowed": {
"type": "boolean",
"optional": true,
"description": "The converted plain text will be wrapped to lines not longer than 72 characters and use format flowed, as defined by RFC 2646."
}
}
},
{
"type": "function",
"name": "callback",
"optional": true,
"parameters": [
{
"type": "string"
}
]
}
]
},
{
"name": "formatFileSize",
"type": "function",

Просмотреть файл

@ -174,3 +174,72 @@ add_task(async function test_parseMailboxString() {
await extension.awaitFinish("finished");
await extension.unload();
});
add_task(async function test_convertToPlainText() {
const extension = ExtensionTestUtils.loadExtension({
files: {
"background.js": async () => {
const lorem =
"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.";
const tests = [
{
body: "\r\n<html><body><p>This is <b>some</b> html content,<br><br>Good night!<br></p>\r\n</body></html>",
expectedPlain: "This is some html content,\n\nGood night!",
},
{
body: `\r\n<html><body><p>This is <b>random</b> html content,<br>${lorem}<br>${lorem}</p></body></html>`,
expectedPlain: `This is random html content,\n${lorem}\n${lorem}`,
},
{
body: `\r\n<html><body><p>This is <i>flowed</i> html content,<br>${lorem}<br>${lorem}</p></body></html>`,
options: { flowed: true },
// Flowed output is wrapping lines to 72 chars length. The enforced
// line breaks have a trailing space, allowing the client to reflow
// the text and only honor the line breaks added by the user.
expectedPlain: `This is flowed html content,
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit
amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam
nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua. At vero eos et accusam et justo duo dolores et ea
rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem
ipsum dolor sit amet.
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit
amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam
nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua. At vero eos et accusam et justo duo dolores et ea
rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem
ipsum dolor sit amet.`,
},
];
for (let i = 0; i < tests.length; i++) {
const { body, options, expectedPlain } = tests[i];
const actual = await browser.messengerUtilities.convertToPlainText(
body,
options
);
browser.test.assertEq(
expectedPlain,
actual,
`Converted plain text for test #${i} should be correct`
);
}
browser.test.notifyPass("finished");
},
},
manifest: {
manifest_version: 2,
background: { scripts: ["background.js"] },
},
});
await extension.startup();
await extension.awaitFinish("finished");
await extension.unload();
});