Import InPageTranslation updates (#402)
* Import InPageTranslation updates from github.com/jelmervdl/firefox-translations * Try to make the linter happy * Add support for translating text nodes This seems to be necessary for supporting Youtube. It also seems to fix the issue in #385, but I still see some interaction between the Wordpress plugin and this code that should not be happening. * Decrease translation message batch size The changes to InPageTranslation result in at least 50% less messages per page. To make the immediately visible elements render much quicker, I reduced it further to reduce the delay. * Reuse text nodes from page Reuse text nodes already existing in the page if possible. This fixes issues with React pages that tend to re-add text nodes if they have been removed, showing double or untranslated text. Fixes #405. * Improve text deletion from errornously cloned inline elements It worked in one direction, now it works in both. Still, this scenario should almost never happen. # Conflicts: # extension/view/js/InPageTranslation.js * Let's treat `<a>` like `<span>` # Conflicts: # extension/view/js/InPageTranslation.js * Warn about text nodes that become empty after translation # Conflicts: # extension/view/js/InPageTranslation.js * Fix `removeTextNodes` call * Make linter happy
This commit is contained in:
Родитель
0fb7d9e472
Коммит
6ab1fe6e04
|
@ -10,7 +10,7 @@ class Translation {
|
|||
constructor (mediator){
|
||||
this.translationsMessagesCounter = 0;
|
||||
this.TRANSLATION_INTERVAL = 100; // ms
|
||||
this.MAX_TRANSLATION_MSGS = 100; // max translations to process per batch we should utilize here the max throughput per cpu type
|
||||
this.MAX_TRANSLATION_MSGS = 25; // max translations to process per batch we should utilize here the max throughput per cpu type
|
||||
this.translateSchedule = null; // holds a reference to the translation setTimeout
|
||||
this.translationMessageBuffer = new Queue();
|
||||
this.mediator = mediator;
|
||||
|
@ -170,7 +170,8 @@ class Translation {
|
|||
pageLanguage,
|
||||
attrId,
|
||||
withOutboundTranslation,
|
||||
withQualityEstimation
|
||||
withQualityEstimation,
|
||||
isHTML
|
||||
) {
|
||||
|
||||
/*
|
||||
|
@ -182,7 +183,7 @@ class Translation {
|
|||
translationMessage.messageID = this.translationsMessagesCounter;
|
||||
translationMessage.sourceParagraph = sourceParagraph;
|
||||
// let's revisit this later, since passing false here when there's plain text is breaking
|
||||
translationMessage.isHTML = true; // this.htmlRegex.test(sourceParagraph);
|
||||
translationMessage.isHTML = isHTML;
|
||||
switch (type) {
|
||||
case "outbound":
|
||||
translationMessage.sourceLanguage = navigatorLanguage;
|
||||
|
|
|
@ -256,7 +256,8 @@ class Mediator {
|
|||
this.languageDetection.pageLanguage,
|
||||
message.payload.attrId,
|
||||
message.payload.withOutboundTranslation,
|
||||
message.payload.withQualityEstimation
|
||||
message.payload.withQualityEstimation,
|
||||
message.payload.isHTML
|
||||
);
|
||||
this.translation.translate(translationMessage);
|
||||
// console.log("new translation message sent:", translationMessage, "msg sender lookuptable size:", this.messagesSenderLookupTable.size);
|
||||
|
@ -324,6 +325,8 @@ class Mediator {
|
|||
if (!this.inPageTranslation.started) {
|
||||
this.inPageTranslation.withOutboundTranslation = message.withOutboundTranslation;
|
||||
this.inPageTranslation.withQualityEstimation = message.withQualityEstimation;
|
||||
if (!message.withQualityEstimation) this.inPageTranslation.addElement(document.getElementsByTagName("title")[0]);
|
||||
this.inPageTranslation.addElement(document.body);
|
||||
this.inPageTranslation.start(this.languageDetection.pageLanguage);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -17,14 +17,46 @@ class InPageTranslation {
|
|||
this.mediator = mediator;
|
||||
this.started = false;
|
||||
this.language = null;
|
||||
this.viewportNodeMap = new Map();
|
||||
this.hiddenNodeMap = new Map();
|
||||
this.nonviewportNodeMap = new Map();
|
||||
this.updateMap = new Map();
|
||||
|
||||
/* timeout between First Translation Received -> Update DOM With Translations. */
|
||||
this.updateTimeout = null;
|
||||
this.UI_UPDATE_INTERVAL = 500;
|
||||
this.messagesSent = new Set();
|
||||
this.nodesSent = new WeakSet();
|
||||
|
||||
/*
|
||||
* table of [Element]:Object to be submitted, and some info about them.
|
||||
* Filled by queueTranslation(), emptied by dispatchTranslation().
|
||||
*/
|
||||
this.queuedNodes = new Map();
|
||||
|
||||
/*
|
||||
* table of [Number]:Element of nodes that have been submitted, and are
|
||||
* waiting for a translation.
|
||||
*/
|
||||
this.pendingTranslations = new Map();
|
||||
|
||||
/*
|
||||
* table of [Element]:Number, inverse of pendingTranslations for easy
|
||||
* cancelling of incoming responses when the node changed after
|
||||
* submission of the request.
|
||||
*/
|
||||
this.submittedNodes = new Map();
|
||||
|
||||
/*
|
||||
* queue with the translation text that they should
|
||||
* be filled with once updateTimeout is reached. Filled by
|
||||
* `queueTranslationResponse()` and emptied by `updateElements()`.
|
||||
*/
|
||||
this.translatedNodes = new Map();
|
||||
|
||||
/*
|
||||
* set of elements that have been translated and should not be submitted
|
||||
* again unless their contents changed.
|
||||
*/
|
||||
this.processedNodes = new WeakSet();
|
||||
|
||||
// all elements we're actively trying to translate.
|
||||
this.targetNodes = new Set();
|
||||
|
||||
this.initialWordsInViewportReported = false;
|
||||
this.withOutboundTranslation = null;
|
||||
this.withQualityEstimation = null;
|
||||
|
@ -42,7 +74,6 @@ class InPageTranslation {
|
|||
// tags that are treated as "meh inline tags just send them to the translator"
|
||||
this.inlineTags = new Set([
|
||||
"abbr",
|
||||
"a",
|
||||
"b",
|
||||
"em",
|
||||
"i",
|
||||
|
@ -69,7 +100,16 @@ class InPageTranslation {
|
|||
"th",
|
||||
"td",
|
||||
"li",
|
||||
"br"
|
||||
"br",
|
||||
]);
|
||||
|
||||
/*
|
||||
* tags that give no hint about the inline-ness of their contents
|
||||
* because of how they are used in modern web development.
|
||||
*/
|
||||
this.genericTags = new Set([
|
||||
"a",
|
||||
"span",
|
||||
]);
|
||||
|
||||
// tags that we do not want to translate
|
||||
|
@ -128,6 +168,7 @@ class InPageTranslation {
|
|||
/*
|
||||
* handled in isExcludedNode
|
||||
* `*[lang]:not([lang|=${language}])`
|
||||
* `*[translate=no]`
|
||||
*/
|
||||
])
|
||||
|
||||
|
@ -136,10 +177,10 @@ class InPageTranslation {
|
|||
for (const mutation of mutationsList) {
|
||||
switch (mutation.type) {
|
||||
case "childList":
|
||||
mutation.addedNodes.forEach(node => this.startTreeWalker(node));
|
||||
mutation.addedNodes.forEach(this.restartTreeWalker.bind(this));
|
||||
break;
|
||||
case "characterData":
|
||||
this.startTreeWalker(mutation.target);
|
||||
this.restartTreeWalker(mutation.target);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -147,7 +188,25 @@ class InPageTranslation {
|
|||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
addElement(node) {
|
||||
// exclude non elements
|
||||
if (!(node instanceof Element)) return;
|
||||
|
||||
// exclude nodes we're already tracking
|
||||
if (this.targetNodes.has(node)) return;
|
||||
|
||||
this.targetNodes.add(node);
|
||||
|
||||
if (this.started) {
|
||||
this.startTreeWalker(node);
|
||||
this.observer.observe(node, {
|
||||
characterData: true,
|
||||
childList: true,
|
||||
subtree: true
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
start(language) {
|
||||
|
@ -165,14 +224,49 @@ class InPageTranslation {
|
|||
// language we expect. If we find elements that do not match, nope out.
|
||||
this.language = language;
|
||||
|
||||
const pageTitle = document.getElementsByTagName("title")[0];
|
||||
if (pageTitle && !this.withQualityEstimation) {
|
||||
this.queueTranslation(pageTitle);
|
||||
}
|
||||
this.startTreeWalker(document.body);
|
||||
/*
|
||||
* pre-construct the excluded node selector. Doing it here since it
|
||||
* needs to know `language`. See `containsExcludedNode()`.
|
||||
*/
|
||||
this.excludedNodeSelector = `[lang]:not([lang|="${this.language}"]),[translate=no],${Array.from(this.excludedTags).join(",")},#OTapp`;
|
||||
|
||||
for (let node of this.targetNodes) this.startTreeWalker(node);
|
||||
|
||||
this.startMutationObserver();
|
||||
}
|
||||
|
||||
/*
|
||||
* stops the InPageTranslation process, stopping observing and regard any
|
||||
* in-flight translation request as lost.
|
||||
*/
|
||||
stop() {
|
||||
if (!this.started) return;
|
||||
|
||||
/*
|
||||
* todo: cancel translation requests? Not really necessary at this level
|
||||
* because stop() is called on disconnect from the background-script,
|
||||
* and that script on its own will cancel translation requests from
|
||||
* pages it is no longer connected to.
|
||||
*/
|
||||
|
||||
this.stopMutationObserver();
|
||||
|
||||
/*
|
||||
* remove all elements for which we haven't received a translation yet
|
||||
* from the 'sent' list.
|
||||
*/
|
||||
this.submittedNodes.clear();
|
||||
|
||||
this.pendingTranslations.forEach(node => {
|
||||
this.processedNodes.delete(node);
|
||||
this.queueTranslation(node);
|
||||
})
|
||||
|
||||
this.pendingTranslations.clear();
|
||||
|
||||
this.started = false;
|
||||
}
|
||||
|
||||
addDebugStylesheet() {
|
||||
const element = document.createElement("style");
|
||||
element.textContent = "";
|
||||
|
@ -229,23 +323,29 @@ class InPageTranslation {
|
|||
});
|
||||
}
|
||||
|
||||
/*
|
||||
* start walking from `root` down through the DOM tree and decide which
|
||||
* elements to enqueue for translation.
|
||||
*/
|
||||
startTreeWalker(root) {
|
||||
|
||||
// we found instances when the root can be null here. so, let's test
|
||||
if (!root) return;
|
||||
|
||||
// if we have a textNode whose parent's parent was excluded we walk through its parent and reject it
|
||||
if (root.nodeType === 3){
|
||||
this.startTreeWalker(root.parentNode);
|
||||
return;
|
||||
/*
|
||||
* if the parent itself is rejected, we don't translate any children.
|
||||
* However, if this is a specifically targeted node, we don't do this
|
||||
* check. Mainly so we can exclude <head>, but include <title>.
|
||||
*/
|
||||
if (!this.targetNodes.has(root)) {
|
||||
for (let parent of this.ancestors(root)) {
|
||||
if (this.validateNode(parent) === NodeFilter.FILTER_REJECT) return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* todo: Bit of added complicated logic to include `root` in the set
|
||||
* bit of added complicated logic to include `root` in the set
|
||||
* of nodes that is being evaluated. Normally TreeWalker will only
|
||||
* look at the descendants.
|
||||
*/
|
||||
switch (this.validateNode(root)) {
|
||||
switch (this.validateNodeForQueue(root)) {
|
||||
// if even the root is already rejected, no need to look further
|
||||
case NodeFilter.FILTER_REJECT:
|
||||
return;
|
||||
|
@ -267,23 +367,52 @@ class InPageTranslation {
|
|||
const nodeIterator = document.createTreeWalker(
|
||||
root,
|
||||
NodeFilter.SHOW_ELEMENT,
|
||||
this.validateNode.bind(this)
|
||||
this.validateNodeForQueue.bind(this)
|
||||
);
|
||||
|
||||
let currentNode;
|
||||
|
||||
// eslint-disable-next-line no-cond-assign
|
||||
while (currentNode = nodeIterator.nextNode()) {
|
||||
this.queueTranslation(currentNode);
|
||||
}
|
||||
} break;
|
||||
|
||||
default:
|
||||
// here because of linter, this point is never reached.
|
||||
break;
|
||||
}
|
||||
|
||||
this.dispatchTranslations();
|
||||
}
|
||||
|
||||
/*
|
||||
* like startTreeWalker, but without the "oh ignore this element if it has
|
||||
* already been submitted" bit. Use this one for submitting changed elements.
|
||||
*/
|
||||
restartTreeWalker(root) {
|
||||
|
||||
/*
|
||||
* remove node from sent map: if it was send, we don't want it to update
|
||||
* with an old translation once the translation response comes in.
|
||||
*/
|
||||
const id = this.submittedNodes.get(root);
|
||||
if (id) {
|
||||
this.submittedNodes.delete(root);
|
||||
this.pendingTranslations.delete(id);
|
||||
}
|
||||
|
||||
// remove node from processed list: we want to reprocess it.
|
||||
this.processedNodes.delete(root);
|
||||
|
||||
// start submitting it again
|
||||
this.startTreeWalker(root);
|
||||
}
|
||||
|
||||
isElementInViewport(element) {
|
||||
// eslint-disable-next-line no-param-reassign
|
||||
if (element.nodeType === Node.TEXT_NODE) element = element.parentElement;
|
||||
|
||||
const rect = element.getBoundingClientRect();
|
||||
return (
|
||||
rect.top >= 0 &&
|
||||
|
@ -294,19 +423,21 @@ class InPageTranslation {
|
|||
}
|
||||
|
||||
isElementHidden(element) {
|
||||
// eslint-disable-next-line no-param-reassign
|
||||
if (element.nodeType === Node.TEXT_NODE) element = element.parentElement;
|
||||
|
||||
const computedStyle = window.getComputedStyle(element);
|
||||
return computedStyle.display === "none" ||
|
||||
computedStyle.visibility === "hidden" ||
|
||||
element.offsetParent === null;
|
||||
}
|
||||
|
||||
isParentTranslating(node){
|
||||
|
||||
/*
|
||||
* if the parent of the node is already translating we should reject
|
||||
* it since we already sent it to translation
|
||||
* test whether any of the parent nodes are already in the process of being
|
||||
* translated. If the parent of the node is already translating we should
|
||||
* reject it since we already sent it to translation.
|
||||
*/
|
||||
|
||||
isParentQueued(node){
|
||||
// if the immediate parent is the body we just allow it
|
||||
if (node.parentNode === document.body) {
|
||||
return false;
|
||||
|
@ -315,9 +446,8 @@ class InPageTranslation {
|
|||
// let's iterate until we find either the body or if the parent was sent
|
||||
let lastNode = node;
|
||||
while (lastNode.parentNode) {
|
||||
// console.log("isParentTranslating node", node, " isParentTranslating nodeParent ", lastNode.parentNode);
|
||||
if (this.nodesSent.has(lastNode.parentNode)) {
|
||||
return true;
|
||||
if (this.queuedNodes.has(lastNode.parentNode)) {
|
||||
return lastNode.parentNode;
|
||||
}
|
||||
lastNode = lastNode.parentNode;
|
||||
}
|
||||
|
@ -325,19 +455,26 @@ class InPageTranslation {
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* test whether this node should be treated as a wrapper of text, e.g.
|
||||
* a `<p>`, or as a wrapper for block elements, e.g. `<div>`, based on
|
||||
* its contents. The first we submit for translation, the second we try to
|
||||
* split into smaller chunks of HTML for better latency.
|
||||
*/
|
||||
hasInlineContent(node) {
|
||||
if (node.nodeType === Node.TEXT_NODE) return true;
|
||||
|
||||
let inlineElements = 0;
|
||||
let blockElements = 0;
|
||||
|
||||
for (let child of node.childNodes) {
|
||||
switch (child.nodeType) {
|
||||
case 3: // textNode
|
||||
case Node.TEXT_NODE:
|
||||
if (child.textContent.trim().length > 0) inlineElements += 1;
|
||||
break;
|
||||
|
||||
case 1: // element
|
||||
if (this.inlineTags.has(child.nodeName.toLowerCase()) ||
|
||||
(child.nodeName.toLowerCase() === "span" && this.hasInlineContent(child))) inlineElements+=1;
|
||||
case Node.ELEMENT_NODE: // element
|
||||
if (this.inlineTags.has(child.nodeName.toLowerCase())) inlineElements += 1;
|
||||
else if (this.genericTags.has(child.nodeName.toLowerCase()) && this.hasInlineContent(child)) inlineElements += 1;
|
||||
else blockElements += 1;
|
||||
break;
|
||||
default:
|
||||
|
@ -348,11 +485,21 @@ class InPageTranslation {
|
|||
return inlineElements >= blockElements;
|
||||
}
|
||||
|
||||
/*
|
||||
* test whether any of the direct text nodes of this node are non-whitespace
|
||||
* text nodes.
|
||||
*
|
||||
* For example:
|
||||
* - `<p>test</p>`: yes
|
||||
* - `<p> </p>`: no
|
||||
* - `<p><b>test</b></p>`: no
|
||||
*/
|
||||
hasTextNodes(node) {
|
||||
if (node.nodeType !== Node.ELEMENT_NODE) return false;
|
||||
// there is probably a quicker way to do this
|
||||
for (let child of node.childNodes) {
|
||||
switch (child.nodeType) {
|
||||
case 3: // textNode
|
||||
case Node.TEXT_NODE: // textNode
|
||||
if (child.textContent.trim() !== "") return true;
|
||||
break;
|
||||
default:
|
||||
|
@ -363,7 +510,15 @@ class InPageTranslation {
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* test whether this is an element we do not want to translate. These
|
||||
* are things like `<code>`, elements with a different `lang` attribute,
|
||||
* and elements that have a `translate=no` attribute.
|
||||
*/
|
||||
isExcludedNode(node) {
|
||||
// text nodes are never excluded
|
||||
if (node.nodeType === Node.TEXT_NODE) return false;
|
||||
|
||||
// exclude certain elements
|
||||
if (this.excludedTags.has(node.nodeName.toLowerCase())) return true;
|
||||
|
||||
|
@ -373,137 +528,172 @@ class InPageTranslation {
|
|||
*/
|
||||
if (node.lang && node.lang.substr(0,2) !== this.language) return true;
|
||||
|
||||
/*
|
||||
* exclude elements that have an translate=no attribute
|
||||
* (See https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/translate)
|
||||
*/
|
||||
if (node.translate === false || node.getAttribute("translate") === "no") return true;
|
||||
|
||||
// we should explicitly exclude the outbound translations widget
|
||||
if (node.id === "OTapp") return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* like `isExcludedNode` but looks at the full subtree. Used to see whether
|
||||
* we can submit a subtree, or whether we should split it into smaller
|
||||
* branches first to try to exclude more of the non-translatable content.
|
||||
*/
|
||||
containsExcludedNode(node) {
|
||||
return node.nodeType === Node.ELEMENT_NODE && node.querySelector(this.excludedNodeSelector);
|
||||
}
|
||||
|
||||
/*
|
||||
* tODO describe this in terms of the function above, but I assume
|
||||
* using querySelector is faster for now.
|
||||
* used by TreeWalker to determine whether to ACCEPT, REJECT or SKIP a
|
||||
* subtree. Only checks if the element is acceptable. It does not check
|
||||
* whether the element has been translated already, which makes it usable
|
||||
* on parent nodes to validate whether a child node is in a translatable
|
||||
* context.
|
||||
*
|
||||
* Returns:
|
||||
* - FILTER_ACCEPT: this subtree should be a translation request.
|
||||
* - FILTER_SKIP : this node itself should not be a translation request
|
||||
* but subtrees beneath it could be!
|
||||
* - FILTER_REJECT: skip this node and everything beneath it.
|
||||
*/
|
||||
if (node.nodeType === 1) {
|
||||
node.querySelector(`[lang]:not([lang|="${this.language}"]), ${Array.from(this.excludedTags).join(",")}`);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
validateNode(node) {
|
||||
if (node.nodeType === 1 && this.isExcludedNode(node)) {
|
||||
node.setAttribute("x-bergamot-translated", "rejected is-excluded-node");
|
||||
// little helper to add markings to elements for debugging
|
||||
const mark = value => {
|
||||
if (node.nodeType === Node.ELEMENT_NODE) node.setAttribute("x-bergamot-translated", value);
|
||||
};
|
||||
|
||||
/*
|
||||
* don't resubmit subtrees that are already in progress (unless their
|
||||
* contents have been changed
|
||||
*/
|
||||
if (this.queuedNodes.has(node) || this.isParentQueued(node)) {
|
||||
// node.setAttribute("x-bergamot-translated", "rejected is-parent-translating");
|
||||
return NodeFilter.FILTER_REJECT;
|
||||
}
|
||||
|
||||
if (node.nodeType === 1 && node.textContent.trim().length === 0) {
|
||||
node.setAttribute("x-bergamot-translated", "rejected empty-text-content");
|
||||
// exclude nodes that we don't want to translate
|
||||
if (this.isExcludedNode(node)) {
|
||||
mark("rejected is-excluded-node");
|
||||
return NodeFilter.FILTER_REJECT;
|
||||
}
|
||||
|
||||
if (this.isParentTranslating(node)) {
|
||||
// node.setAttribute('x-bergamot-translated', 'rejected is-parent-translating');
|
||||
// skip over subtrees that don"t have text
|
||||
if (node.textContent.trim().length === 0) {
|
||||
mark("rejected empty-text-content");
|
||||
return NodeFilter.FILTER_REJECT;
|
||||
}
|
||||
|
||||
if (node.nodeType === 1 && !this.hasInlineContent(node)) {
|
||||
node.setAttribute("x-bergamot-translated", "skipped does-not-have-text-of-its-own");
|
||||
if (!this.hasInlineContent(node)) {
|
||||
mark("skipped does-not-have-text-of-its-own");
|
||||
return NodeFilter.FILTER_SKIP; // otherwise dig deeper
|
||||
}
|
||||
|
||||
if (node.nodeType === 1 && this.containsExcludedNode(node) && !this.hasTextNodes(node)) {
|
||||
node.setAttribute("x-bergamot-translated", "skipped contains-excluded-node");
|
||||
if (this.containsExcludedNode(node) && !this.hasTextNodes(node)) {
|
||||
mark("skipped contains-excluded-node");
|
||||
return NodeFilter.FILTER_SKIP; // otherwise dig deeper
|
||||
}
|
||||
|
||||
return NodeFilter.FILTER_ACCEPT; // send whole node as 1 block
|
||||
}
|
||||
|
||||
queueTranslation(node) {
|
||||
/*
|
||||
* used by TreeWalker to determine whether to ACCEPT, REJECT or SKIP a
|
||||
* subtree. Checks whether element is acceptable, and hasn't been
|
||||
* translated already.
|
||||
*/
|
||||
validateNodeForQueue(node) {
|
||||
// skip nodes already seen (for the partial subtree change, or restart of the whole InPageTranslation process.)
|
||||
if (this.processedNodes.has(node)) return NodeFilter.FILTER_REJECT;
|
||||
|
||||
return this.validateNode(node);
|
||||
}
|
||||
|
||||
/*
|
||||
* let's store the node to keep its reference
|
||||
* and send it to the translation worker
|
||||
* enqueue a node for translation. Called during startTreeWalker. Queues
|
||||
* are emptied by dispatchTranslation().
|
||||
*/
|
||||
queueTranslation(node) {
|
||||
this.translationsCounter += 1;
|
||||
|
||||
// debugging: mark the node so we can add CSS to see them
|
||||
if (node.nodeType === 1) {
|
||||
node.setAttribute("x-bergamot-translated", this.translationsCounter);
|
||||
// let's categorize the elements on their respective hashmaps
|
||||
if (this.isElementHidden(node)) {
|
||||
// if the element is entirely hidden
|
||||
this.hiddenNodeMap.set(this.translationsCounter, node);
|
||||
} else if (this.isElementInViewport(node)) {
|
||||
// if the element is present in the viewport
|
||||
this.viewportNodeMap.set(this.translationsCounter, node);
|
||||
} else {
|
||||
// if the element is visible but not present in the viewport
|
||||
this.nonviewportNodeMap.set(this.translationsCounter, node);
|
||||
}
|
||||
this.nodesSent.add(node);
|
||||
}
|
||||
if (node.nodeType === Node.ELEMENT_NODE) node.setAttribute("x-bergamot-translated", this.translationsCounter);
|
||||
|
||||
let priority = 2;
|
||||
if (this.isElementHidden(node)) priority = 3;
|
||||
else if (this.isElementInViewport(node)) priority = 1;
|
||||
|
||||
this.queuedNodes.set(node, {
|
||||
id: this.translationsCounter,
|
||||
priority
|
||||
});
|
||||
}
|
||||
|
||||
dispatchTranslations() {
|
||||
this.reportWordsInViewport();
|
||||
// we then submit for translation the elements in order of priority
|
||||
this.processingNodeMap = "viewportNodeMap";
|
||||
this.viewportNodeMap.forEach(this.submitTranslation, this);
|
||||
this.processingNodeMap = "nonviewportNodeMap";
|
||||
this.nonviewportNodeMap.forEach(this.submitTranslation, this);
|
||||
this.processingNodeMap = "hiddenNodeMap";
|
||||
this.hiddenNodeMap.forEach(this.submitTranslation, this);
|
||||
|
||||
const queuesPerPriority = [null, [], [], []] // priorities 1 to 3
|
||||
this.queuedNodes.forEach((message, node) => {
|
||||
queuesPerPriority[message.priority].push({ message, node });
|
||||
});
|
||||
|
||||
for (let priority = 1; priority <= 3; priority += 1) {
|
||||
queuesPerPriority[priority].forEach(({ message, node }) => {
|
||||
this.submitTranslation(message, node);
|
||||
});
|
||||
}
|
||||
|
||||
this.queuedNodes.clear();
|
||||
}
|
||||
|
||||
reportWordsInViewport() {
|
||||
if (this.initialWordsInViewportReported || this.viewportNodeMap.size === 0) return;
|
||||
if (this.initialWordsInViewportReported || this.queuedNodes.size === 0) return;
|
||||
|
||||
let viewPortWordsNum = 0;
|
||||
for (const [, value] of this.viewportNodeMap.entries()) {
|
||||
for (const [message, value] of this.queuedNodes.entries()) {
|
||||
if (message.priority === 3) {
|
||||
viewPortWordsNum += value.textContent.trim().split(/\s+/).length;
|
||||
}
|
||||
}
|
||||
|
||||
this.notifyMediator("reportViewPortWordsNum", viewPortWordsNum);
|
||||
// report words in viewport only for initially loaded content
|
||||
this.initialWordsInViewportReported = true;
|
||||
}
|
||||
|
||||
submitTranslation(node, key) {
|
||||
if (this.messagesSent.has(key)) {
|
||||
// if we already sent this message, we just skip it
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* give each element an id that gets passed through the translation so
|
||||
* we can later on reunite it.
|
||||
*/
|
||||
submitTranslation({ id }, node) {
|
||||
// give each element an id that gets passed through the translation so we can later on reunite it.
|
||||
if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
node.querySelectorAll("*").forEach((el, i) => {
|
||||
el.dataset.xBergamotId = i;
|
||||
});
|
||||
}
|
||||
|
||||
const text = node.innerHTML;
|
||||
if (text.trim().length) {
|
||||
const text = node.nodeType === Node.ELEMENT_NODE
|
||||
? node.innerHTML
|
||||
: node.textContent;
|
||||
if (text.trim().length === 0) return;
|
||||
|
||||
/*
|
||||
* send the content back to mediator in order to have the translation
|
||||
* requested by it
|
||||
*/
|
||||
const payload = {
|
||||
this.notifyMediator("translate", {
|
||||
text,
|
||||
isHTML: node.nodeType === Node.ELEMENT_NODE,
|
||||
type: "inpage",
|
||||
withOutboundTranslation: this.withOutboundTranslation,
|
||||
withQualityEstimation: this.withQualityEstimation,
|
||||
attrId: [
|
||||
this.processingNodeMap,
|
||||
key
|
||||
],
|
||||
};
|
||||
this.notifyMediator("translate", payload);
|
||||
this.messagesSent.add(key);
|
||||
}
|
||||
attrId: [id],
|
||||
});
|
||||
|
||||
// keep reference to this node for once we receive a translation response.
|
||||
this.pendingTranslations.set(id, node);
|
||||
this.submittedNodes.set(node, id);
|
||||
|
||||
// also mark this node as not to be translated again unless the contents are changed (which the observer will pick up on)
|
||||
this.processedNodes.add(node);
|
||||
}
|
||||
|
||||
notifyMediator(command, payload) {
|
||||
|
@ -511,13 +701,14 @@ class InPageTranslation {
|
|||
}
|
||||
|
||||
startMutationObserver() {
|
||||
|
||||
this.observer.observe(document, {
|
||||
for (let node of this.targetNodes) {
|
||||
this.observer.observe(node, {
|
||||
characterData: true,
|
||||
childList: true,
|
||||
subtree: true
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
stopMutationObserver() {
|
||||
this.observer.disconnect();
|
||||
|
@ -535,7 +726,7 @@ class InPageTranslation {
|
|||
}
|
||||
|
||||
updateElements() {
|
||||
const updateElement = (translatedHTML, node) => {
|
||||
const updateElement = ({ translatedHTML }, node) => {
|
||||
// console.groupCollapsed(computePath(node));
|
||||
node.setAttribute("x-bergamot-translated", "");
|
||||
|
||||
|
@ -557,33 +748,6 @@ class InPageTranslation {
|
|||
|
||||
const clonedNodes = new Set();
|
||||
|
||||
const removeTextNodes = node => {
|
||||
Array.from(node.childNodes).forEach(child => {
|
||||
switch (child.nodeType) {
|
||||
case Node.TEXT_NODE:
|
||||
node.removeChild(child);
|
||||
break;
|
||||
case Node.ELEMENT_NODE:
|
||||
removeTextNodes(child);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
// check (recursively) if a given node and all its children have only QE specific attributes
|
||||
const hasOnlyQEAttributes = node => {
|
||||
if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
if (node.nodeName.toUpperCase() !== "FONT") return false;
|
||||
|
||||
if (!node.getAttributeNames().every(attribute => this.qeAttributes.has(attribute))) return false;
|
||||
|
||||
for (let child of node.children) if (!hasOnlyQEAttributes(child)) return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
/*
|
||||
* merge the live tree (dst) with the translated tree (src) by
|
||||
* re-using elements from the live tree.
|
||||
|
@ -595,13 +759,16 @@ class InPageTranslation {
|
|||
* an (indexed) reference to them since we will be adding them
|
||||
* back, but possibly in a different order.
|
||||
*/
|
||||
const dstChildNodes = Object.fromEntries(Array.from(dst.childNodes)
|
||||
.map(child => dst.removeChild(child))
|
||||
const nodes = Array.from(dst.childNodes).map(child => dst.removeChild(child));
|
||||
|
||||
const dstChildNodes = Object.fromEntries(nodes
|
||||
.filter(child => child.nodeType === Node.ELEMENT_NODE)
|
||||
.map(child => [child.dataset.xBergamotId, child]));
|
||||
|
||||
const dstTextNodes = nodes.filter(child => child.nodeType === Node.TEXT_NODE);
|
||||
|
||||
// src (translated) dictates the order.
|
||||
Array.from(src.childNodes).forEach(child => {
|
||||
Array.from(src.childNodes).forEach((child, index, siblings) => {
|
||||
// element nodes we try to use the already existing DOM nodes
|
||||
if (child.nodeType === Node.ELEMENT_NODE) {
|
||||
|
||||
|
@ -619,7 +786,7 @@ class InPageTranslation {
|
|||
* when QE is on) then just add the translated element child to the live
|
||||
* element node.
|
||||
*/
|
||||
if (!child.hasAttribute("data-x-bergamot-id") && hasOnlyQEAttributes(child)) {
|
||||
if (!child.hasAttribute("data-x-bergamot-id") && this.hasOnlyQEAttributes(child)) {
|
||||
dst.appendChild(child);
|
||||
} else {
|
||||
console.warn(`[InPlaceTranslation] ${this.computePath(child, scratch)} Could not find counterpart for`, child.dataset.xBergamotId, dstChildNodes, child);
|
||||
|
@ -650,7 +817,20 @@ class InPageTranslation {
|
|||
* the one that came out of translation doesn't?
|
||||
*/
|
||||
console.warn(`[InPlaceTranslation] ${this.computePath(child, scratch)} Child ${child.outerHTML} has no text but counterpart ${counterpart.outerHTML} does`);
|
||||
removeTextNodes(counterpart); // this should not be necessary
|
||||
|
||||
/*
|
||||
* todo: This scenario might be caused by one of two
|
||||
* causes: 1) element was duplicated by translation
|
||||
* but then not given text content. This happens on
|
||||
* Wikipedia articles for example.
|
||||
* Or 2) the translator messed up and could not
|
||||
* translate the text. This happens on Youtube in the
|
||||
* language selector. In that case, having the original
|
||||
* text is much better than no text at all.
|
||||
* To make sure it is this case, and not option 2
|
||||
* we check whether this is the only occurrence.
|
||||
*/
|
||||
if (siblings.some((sibling, i) => sibling.nodeType === Node.ELEMENT_NODE && index !== i && child.dataset.xBergamotId === sibling.dataset.xBergamotId)) this.removeTextNodes(counterpart);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -658,9 +838,13 @@ class InPageTranslation {
|
|||
* it has been synced with the translated text and order.
|
||||
*/
|
||||
dst.appendChild(counterpart);
|
||||
} else {
|
||||
// all other node types we just copy in directly
|
||||
dst.appendChild(child);
|
||||
} else if (child.nodeType === Node.TEXT_NODE) {
|
||||
let counterpart = dstTextNodes.shift();
|
||||
|
||||
if (typeof counterpart !== "undefined") counterpart.data = child.data;
|
||||
else counterpart = child;
|
||||
|
||||
dst.appendChild(counterpart);
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -679,31 +863,58 @@ class InPageTranslation {
|
|||
|
||||
merge(node, scratch);
|
||||
|
||||
return node;
|
||||
};
|
||||
|
||||
const updateTextNode = ({ translatedHTML }, node) => {
|
||||
|
||||
/*
|
||||
* remove node again from nodesSent because someone might change
|
||||
* the innerHTML or add children, and then we want to translate
|
||||
* those.
|
||||
* TODO: what if a node was mutated while translation was pending?
|
||||
* Will that mutation then be ignored?
|
||||
* regardless of withQualityEstimation, if translatedHTML is empty
|
||||
* we have an empty string as output. Which is useless.
|
||||
*/
|
||||
this.nodesSent.delete(node);
|
||||
if (translatedHTML.trim().length === 0) {
|
||||
console.warn("[InPlaceTranslation] text node", node, "translated to <empty string>");
|
||||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
* is this a good idea?
|
||||
* this.nodesSent.delete(node);
|
||||
* console.groupEnd(computePath(node));
|
||||
* when we're getting quality estimations back, translatedHTML is
|
||||
* indeed actual HTML with font tags containing that info.
|
||||
*/
|
||||
if (this.withQualityEstimation) {
|
||||
const nonLiveDomContainer = document.createElement("template");
|
||||
nonLiveDomContainer.innerHTML = translatedHTML;
|
||||
|
||||
const fragment = document.createDocumentFragment();
|
||||
for (let child of nonLiveDomContainer.content.childNodes) {
|
||||
if (this.hasOnlyQEAttributes(child)) {
|
||||
fragment.appendChild(child);
|
||||
}
|
||||
}
|
||||
node.parentNode.replaceChild(fragment, node);
|
||||
return fragment;
|
||||
}
|
||||
node.textContent = translatedHTML;
|
||||
return node;
|
||||
|
||||
};
|
||||
|
||||
// pause observing mutations
|
||||
this.stopMutationObserver();
|
||||
|
||||
try {
|
||||
this.updateMap.forEach(updateElement);
|
||||
// let's test this here to prevent unnecessary traversals
|
||||
if (this.withQualityEstimation) this.reportQualityEstimation(this.updateMap.keys());
|
||||
this.updateMap.clear();
|
||||
const touchedNodes = Array.from(this.translatedNodes, ([node, message]) => {
|
||||
switch (node.nodeType) {
|
||||
case Node.TEXT_NODE:
|
||||
return updateTextNode(message, node);
|
||||
case Node.ELEMENT_NODE:
|
||||
return updateElement(message, node);
|
||||
default:
|
||||
return node; // never happens
|
||||
}
|
||||
});
|
||||
if (this.withQualityEstimation) this.reportQualityEstimation(touchedNodes);
|
||||
this.translatedNodes.clear();
|
||||
this.updateTimeout = null;
|
||||
if (this.withQualityEstimation) this.addQualityClasses();
|
||||
} finally {
|
||||
|
@ -736,35 +947,29 @@ class InPageTranslation {
|
|||
}
|
||||
|
||||
enqueueElement(translationMessage) {
|
||||
const [
|
||||
hashMapName,
|
||||
idCounter
|
||||
] = translationMessage.attrId;
|
||||
const translatedText = translationMessage.translatedParagraph;
|
||||
// console.log("no enqueue", translatedText);
|
||||
let targetNode = null;
|
||||
switch (hashMapName) {
|
||||
case "hiddenNodeMap":
|
||||
targetNode = this.hiddenNodeMap.get(idCounter);
|
||||
this.hiddenNodeMap.delete(idCounter);
|
||||
break;
|
||||
case "viewportNodeMap":
|
||||
targetNode = this.viewportNodeMap.get(idCounter);
|
||||
this.viewportNodeMap.delete(idCounter);
|
||||
break;
|
||||
case "nonviewportNodeMap":
|
||||
targetNode = this.nonviewportNodeMap.get(idCounter);
|
||||
this.nonviewportNodeMap.delete(idCounter);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
this.messagesSent.delete(idCounter);
|
||||
this.updateMap.set(targetNode, translatedText);
|
||||
// we finally schedule the UI update
|
||||
if (!this.updateTimeout) {
|
||||
this.updateTimeout = setTimeout(this.updateElements.bind(this),this.UI_UPDATE_INTERVAL);
|
||||
const [id] = translationMessage.attrId;
|
||||
const translatedHTML = translationMessage.translatedParagraph;
|
||||
|
||||
// look up node by message id. This can fail
|
||||
const node = this.pendingTranslations.get(id);
|
||||
if (typeof node === "undefined") {
|
||||
console.debug("[in-page-translation] Message",id,"is not found in pendingTranslations");
|
||||
return;
|
||||
}
|
||||
|
||||
// prune it.
|
||||
this.pendingTranslations.delete(id);
|
||||
|
||||
// node still exists! Remove node -> (pending) message mapping
|
||||
this.submittedNodes.delete(node);
|
||||
|
||||
// queue node to be populated with translation next update.
|
||||
this.translatedNodes.set(node, { id, translatedHTML });
|
||||
|
||||
// we schedule the UI update
|
||||
if (!this.updateTimeout) this.updateTimeout = setTimeout(this.updateElements.bind(this), this.submittedNodes.size === 0
|
||||
? 0
|
||||
: this.UI_UPDATE_INTERVAL);
|
||||
}
|
||||
|
||||
computePath(node, root) {
|
||||
|
@ -778,4 +983,37 @@ class InPageTranslation {
|
|||
else if (node.className) path += `.${Array.from(node.classList).join(".")}`;
|
||||
return path;
|
||||
}
|
||||
|
||||
*ancestors(node) {
|
||||
for (let parent = node.parentNode; parent && parent !== document.documentElement; parent = parent.parentNode) yield parent;
|
||||
}
|
||||
|
||||
/*
|
||||
* check (recursively) if a given node and all its children have only QE specific attributes
|
||||
*/
|
||||
hasOnlyQEAttributes(node) {
|
||||
if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
if (node.nodeName.toUpperCase() !== "FONT") return false;
|
||||
|
||||
if (!node.getAttributeNames().every(attribute => this.qeAttributes.has(attribute))) return false;
|
||||
|
||||
for (let child of node.children) if (!this.hasOnlyQEAttributes(child)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
removeTextNodes(node) {
|
||||
Array.from(node.childNodes).forEach(child => {
|
||||
switch (child.nodeType) {
|
||||
case Node.TEXT_NODE:
|
||||
node.removeChild(child);
|
||||
break;
|
||||
case Node.ELEMENT_NODE:
|
||||
this.removeTextNodes(child);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
Загрузка…
Ссылка в новой задаче