Import InPageTranslation updates (#402)

* Import InPageTranslation updates from github.com/jelmervdl/firefox-translations

* Try to make the linter happy

* Add support for translating text nodes

This seems to be necessary for supporting Youtube. It also seems to fix the issue in #385, but I still see some interaction between the Wordpress plugin and this code that should not be happening.

* Decrease translation message batch size

The changes to InPageTranslation result in at least 50% less messages per page. To make the immediately visible elements render much quicker, I reduced it further to reduce the delay.

* Reuse text nodes from page

Reuse text nodes already existing in the page if possible. This fixes issues with React pages that tend to re-add text nodes if they have been removed, showing double or untranslated text. Fixes #405.

* Improve text deletion from errornously cloned inline elements

It worked in one direction, now it works in both. Still, this scenario should almost never happen.

# Conflicts:
#	extension/view/js/InPageTranslation.js

* Let's treat `<a>` like `<span>`

# Conflicts:
#	extension/view/js/InPageTranslation.js

* Warn about text nodes that become empty after translation

# Conflicts:
#	extension/view/js/InPageTranslation.js

* Fix `removeTextNodes` call

* Make linter happy
This commit is contained in:
Jelmer 2022-07-06 22:25:09 +01:00 коммит произвёл GitHub
Родитель 0fb7d9e472
Коммит 6ab1fe6e04
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 450 добавлений и 208 удалений

Просмотреть файл

@ -10,7 +10,7 @@ class Translation {
constructor (mediator){
this.translationsMessagesCounter = 0;
this.TRANSLATION_INTERVAL = 100; // ms
this.MAX_TRANSLATION_MSGS = 100; // max translations to process per batch we should utilize here the max throughput per cpu type
this.MAX_TRANSLATION_MSGS = 25; // max translations to process per batch we should utilize here the max throughput per cpu type
this.translateSchedule = null; // holds a reference to the translation setTimeout
this.translationMessageBuffer = new Queue();
this.mediator = mediator;
@ -170,7 +170,8 @@ class Translation {
pageLanguage,
attrId,
withOutboundTranslation,
withQualityEstimation
withQualityEstimation,
isHTML
) {
/*
@ -182,7 +183,7 @@ class Translation {
translationMessage.messageID = this.translationsMessagesCounter;
translationMessage.sourceParagraph = sourceParagraph;
// let's revisit this later, since passing false here when there's plain text is breaking
translationMessage.isHTML = true; // this.htmlRegex.test(sourceParagraph);
translationMessage.isHTML = isHTML;
switch (type) {
case "outbound":
translationMessage.sourceLanguage = navigatorLanguage;

Просмотреть файл

@ -256,7 +256,8 @@ class Mediator {
this.languageDetection.pageLanguage,
message.payload.attrId,
message.payload.withOutboundTranslation,
message.payload.withQualityEstimation
message.payload.withQualityEstimation,
message.payload.isHTML
);
this.translation.translate(translationMessage);
// console.log("new translation message sent:", translationMessage, "msg sender lookuptable size:", this.messagesSenderLookupTable.size);
@ -324,6 +325,8 @@ class Mediator {
if (!this.inPageTranslation.started) {
this.inPageTranslation.withOutboundTranslation = message.withOutboundTranslation;
this.inPageTranslation.withQualityEstimation = message.withQualityEstimation;
if (!message.withQualityEstimation) this.inPageTranslation.addElement(document.getElementsByTagName("title")[0]);
this.inPageTranslation.addElement(document.body);
this.inPageTranslation.start(this.languageDetection.pageLanguage);
}
break;

Просмотреть файл

@ -17,14 +17,46 @@ class InPageTranslation {
this.mediator = mediator;
this.started = false;
this.language = null;
this.viewportNodeMap = new Map();
this.hiddenNodeMap = new Map();
this.nonviewportNodeMap = new Map();
this.updateMap = new Map();
/* timeout between First Translation Received -> Update DOM With Translations. */
this.updateTimeout = null;
this.UI_UPDATE_INTERVAL = 500;
this.messagesSent = new Set();
this.nodesSent = new WeakSet();
/*
* table of [Element]:Object to be submitted, and some info about them.
* Filled by queueTranslation(), emptied by dispatchTranslation().
*/
this.queuedNodes = new Map();
/*
* table of [Number]:Element of nodes that have been submitted, and are
* waiting for a translation.
*/
this.pendingTranslations = new Map();
/*
* table of [Element]:Number, inverse of pendingTranslations for easy
* cancelling of incoming responses when the node changed after
* submission of the request.
*/
this.submittedNodes = new Map();
/*
* queue with the translation text that they should
* be filled with once updateTimeout is reached. Filled by
* `queueTranslationResponse()` and emptied by `updateElements()`.
*/
this.translatedNodes = new Map();
/*
* set of elements that have been translated and should not be submitted
* again unless their contents changed.
*/
this.processedNodes = new WeakSet();
// all elements we're actively trying to translate.
this.targetNodes = new Set();
this.initialWordsInViewportReported = false;
this.withOutboundTranslation = null;
this.withQualityEstimation = null;
@ -42,7 +74,6 @@ class InPageTranslation {
// tags that are treated as "meh inline tags just send them to the translator"
this.inlineTags = new Set([
"abbr",
"a",
"b",
"em",
"i",
@ -69,7 +100,16 @@ class InPageTranslation {
"th",
"td",
"li",
"br"
"br",
]);
/*
* tags that give no hint about the inline-ness of their contents
* because of how they are used in modern web development.
*/
this.genericTags = new Set([
"a",
"span",
]);
// tags that we do not want to translate
@ -128,6 +168,7 @@ class InPageTranslation {
/*
* handled in isExcludedNode
* `*[lang]:not([lang|=${language}])`
* `*[translate=no]`
*/
])
@ -136,10 +177,10 @@ class InPageTranslation {
for (const mutation of mutationsList) {
switch (mutation.type) {
case "childList":
mutation.addedNodes.forEach(node => this.startTreeWalker(node));
mutation.addedNodes.forEach(this.restartTreeWalker.bind(this));
break;
case "characterData":
this.startTreeWalker(mutation.target);
this.restartTreeWalker(mutation.target);
break;
default:
break;
@ -147,7 +188,25 @@ class InPageTranslation {
}
});
});
}
addElement(node) {
// exclude non elements
if (!(node instanceof Element)) return;
// exclude nodes we're already tracking
if (this.targetNodes.has(node)) return;
this.targetNodes.add(node);
if (this.started) {
this.startTreeWalker(node);
this.observer.observe(node, {
characterData: true,
childList: true,
subtree: true
});
}
}
start(language) {
@ -165,14 +224,49 @@ class InPageTranslation {
// language we expect. If we find elements that do not match, nope out.
this.language = language;
const pageTitle = document.getElementsByTagName("title")[0];
if (pageTitle && !this.withQualityEstimation) {
this.queueTranslation(pageTitle);
}
this.startTreeWalker(document.body);
/*
* pre-construct the excluded node selector. Doing it here since it
* needs to know `language`. See `containsExcludedNode()`.
*/
this.excludedNodeSelector = `[lang]:not([lang|="${this.language}"]),[translate=no],${Array.from(this.excludedTags).join(",")},#OTapp`;
for (let node of this.targetNodes) this.startTreeWalker(node);
this.startMutationObserver();
}
/*
* stops the InPageTranslation process, stopping observing and regard any
* in-flight translation request as lost.
*/
stop() {
if (!this.started) return;
/*
* todo: cancel translation requests? Not really necessary at this level
* because stop() is called on disconnect from the background-script,
* and that script on its own will cancel translation requests from
* pages it is no longer connected to.
*/
this.stopMutationObserver();
/*
* remove all elements for which we haven't received a translation yet
* from the 'sent' list.
*/
this.submittedNodes.clear();
this.pendingTranslations.forEach(node => {
this.processedNodes.delete(node);
this.queueTranslation(node);
})
this.pendingTranslations.clear();
this.started = false;
}
addDebugStylesheet() {
const element = document.createElement("style");
element.textContent = "";
@ -227,25 +321,31 @@ class InPageTranslation {
el.classList.toggle("x-fxtranslations-bad",true);
}
});
}
}
/*
* start walking from `root` down through the DOM tree and decide which
* elements to enqueue for translation.
*/
startTreeWalker(root) {
// we found instances when the root can be null here. so, let's test
if (!root) return;
// if we have a textNode whose parent's parent was excluded we walk through its parent and reject it
if (root.nodeType === 3){
this.startTreeWalker(root.parentNode);
return;
/*
* if the parent itself is rejected, we don't translate any children.
* However, if this is a specifically targeted node, we don't do this
* check. Mainly so we can exclude <head>, but include <title>.
*/
if (!this.targetNodes.has(root)) {
for (let parent of this.ancestors(root)) {
if (this.validateNode(parent) === NodeFilter.FILTER_REJECT) return;
}
}
/*
* todo: Bit of added complicated logic to include `root` in the set
* bit of added complicated logic to include `root` in the set
* of nodes that is being evaluated. Normally TreeWalker will only
* look at the descendants.
*/
switch (this.validateNode(root)) {
switch (this.validateNodeForQueue(root)) {
// if even the root is already rejected, no need to look further
case NodeFilter.FILTER_REJECT:
return;
@ -267,23 +367,52 @@ class InPageTranslation {
const nodeIterator = document.createTreeWalker(
root,
NodeFilter.SHOW_ELEMENT,
this.validateNode.bind(this)
this.validateNodeForQueue.bind(this)
);
let currentNode;
// eslint-disable-next-line no-cond-assign
while (currentNode = nodeIterator.nextNode()) {
this.queueTranslation(currentNode);
}
} break;
default:
// here because of linter, this point is never reached.
break;
}
this.dispatchTranslations();
}
/*
* like startTreeWalker, but without the "oh ignore this element if it has
* already been submitted" bit. Use this one for submitting changed elements.
*/
restartTreeWalker(root) {
/*
* remove node from sent map: if it was send, we don't want it to update
* with an old translation once the translation response comes in.
*/
const id = this.submittedNodes.get(root);
if (id) {
this.submittedNodes.delete(root);
this.pendingTranslations.delete(id);
}
// remove node from processed list: we want to reprocess it.
this.processedNodes.delete(root);
// start submitting it again
this.startTreeWalker(root);
}
isElementInViewport(element) {
// eslint-disable-next-line no-param-reassign
if (element.nodeType === Node.TEXT_NODE) element = element.parentElement;
const rect = element.getBoundingClientRect();
return (
rect.top >= 0 &&
@ -294,19 +423,21 @@ class InPageTranslation {
}
isElementHidden(element) {
// eslint-disable-next-line no-param-reassign
if (element.nodeType === Node.TEXT_NODE) element = element.parentElement;
const computedStyle = window.getComputedStyle(element);
return computedStyle.display === "none" ||
computedStyle.visibility === "hidden" ||
element.offsetParent === null;
}
isParentTranslating(node){
/*
* if the parent of the node is already translating we should reject
* it since we already sent it to translation
*/
/*
* test whether any of the parent nodes are already in the process of being
* translated. If the parent of the node is already translating we should
* reject it since we already sent it to translation.
*/
isParentQueued(node){
// if the immediate parent is the body we just allow it
if (node.parentNode === document.body) {
return false;
@ -315,9 +446,8 @@ class InPageTranslation {
// let's iterate until we find either the body or if the parent was sent
let lastNode = node;
while (lastNode.parentNode) {
// console.log("isParentTranslating node", node, " isParentTranslating nodeParent ", lastNode.parentNode);
if (this.nodesSent.has(lastNode.parentNode)) {
return true;
if (this.queuedNodes.has(lastNode.parentNode)) {
return lastNode.parentNode;
}
lastNode = lastNode.parentNode;
}
@ -325,20 +455,27 @@ class InPageTranslation {
return false;
}
/*
* test whether this node should be treated as a wrapper of text, e.g.
* a `<p>`, or as a wrapper for block elements, e.g. `<div>`, based on
* its contents. The first we submit for translation, the second we try to
* split into smaller chunks of HTML for better latency.
*/
hasInlineContent(node) {
if (node.nodeType === Node.TEXT_NODE) return true;
let inlineElements = 0;
let blockElements = 0;
for (let child of node.childNodes) {
switch (child.nodeType) {
case 3: // textNode
if (child.textContent.trim().length > 0) inlineElements+=1;
case Node.TEXT_NODE:
if (child.textContent.trim().length > 0) inlineElements += 1;
break;
case 1: // element
if (this.inlineTags.has(child.nodeName.toLowerCase()) ||
(child.nodeName.toLowerCase() === "span" && this.hasInlineContent(child))) inlineElements+=1;
else blockElements+=1;
case Node.ELEMENT_NODE: // element
if (this.inlineTags.has(child.nodeName.toLowerCase())) inlineElements += 1;
else if (this.genericTags.has(child.nodeName.toLowerCase()) && this.hasInlineContent(child)) inlineElements += 1;
else blockElements += 1;
break;
default:
break;
@ -348,11 +485,21 @@ class InPageTranslation {
return inlineElements >= blockElements;
}
/*
* test whether any of the direct text nodes of this node are non-whitespace
* text nodes.
*
* For example:
* - `<p>test</p>`: yes
* - `<p> </p>`: no
* - `<p><b>test</b></p>`: no
*/
hasTextNodes(node) {
if (node.nodeType !== Node.ELEMENT_NODE) return false;
// there is probably a quicker way to do this
for (let child of node.childNodes) {
switch (child.nodeType) {
case 3: // textNode
case Node.TEXT_NODE: // textNode
if (child.textContent.trim() !== "") return true;
break;
default:
@ -363,7 +510,15 @@ class InPageTranslation {
return false;
}
/*
* test whether this is an element we do not want to translate. These
* are things like `<code>`, elements with a different `lang` attribute,
* and elements that have a `translate=no` attribute.
*/
isExcludedNode(node) {
// text nodes are never excluded
if (node.nodeType === Node.TEXT_NODE) return false;
// exclude certain elements
if (this.excludedTags.has(node.nodeName.toLowerCase())) return true;
@ -373,96 +528,137 @@ class InPageTranslation {
*/
if (node.lang && node.lang.substr(0,2) !== this.language) return true;
/*
* exclude elements that have an translate=no attribute
* (See https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/translate)
*/
if (node.translate === false || node.getAttribute("translate") === "no") return true;
// we should explicitly exclude the outbound translations widget
if (node.id === "OTapp") return true;
return false;
}
/*
* like `isExcludedNode` but looks at the full subtree. Used to see whether
* we can submit a subtree, or whether we should split it into smaller
* branches first to try to exclude more of the non-translatable content.
*/
containsExcludedNode(node) {
/*
* tODO describe this in terms of the function above, but I assume
* using querySelector is faster for now.
*/
if (node.nodeType === 1) {
node.querySelector(`[lang]:not([lang|="${this.language}"]), ${Array.from(this.excludedTags).join(",")}`);
}
return true;
return node.nodeType === Node.ELEMENT_NODE && node.querySelector(this.excludedNodeSelector);
}
/*
* used by TreeWalker to determine whether to ACCEPT, REJECT or SKIP a
* subtree. Only checks if the element is acceptable. It does not check
* whether the element has been translated already, which makes it usable
* on parent nodes to validate whether a child node is in a translatable
* context.
*
* Returns:
* - FILTER_ACCEPT: this subtree should be a translation request.
* - FILTER_SKIP : this node itself should not be a translation request
* but subtrees beneath it could be!
* - FILTER_REJECT: skip this node and everything beneath it.
*/
validateNode(node) {
if (node.nodeType === 1 && this.isExcludedNode(node)) {
node.setAttribute("x-bergamot-translated", "rejected is-excluded-node");
// little helper to add markings to elements for debugging
const mark = value => {
if (node.nodeType === Node.ELEMENT_NODE) node.setAttribute("x-bergamot-translated", value);
};
/*
* don't resubmit subtrees that are already in progress (unless their
* contents have been changed
*/
if (this.queuedNodes.has(node) || this.isParentQueued(node)) {
// node.setAttribute("x-bergamot-translated", "rejected is-parent-translating");
return NodeFilter.FILTER_REJECT;
}
if (node.nodeType === 1 && node.textContent.trim().length === 0) {
node.setAttribute("x-bergamot-translated", "rejected empty-text-content");
// exclude nodes that we don't want to translate
if (this.isExcludedNode(node)) {
mark("rejected is-excluded-node");
return NodeFilter.FILTER_REJECT;
}
if (this.isParentTranslating(node)) {
// node.setAttribute('x-bergamot-translated', 'rejected is-parent-translating');
// skip over subtrees that don"t have text
if (node.textContent.trim().length === 0) {
mark("rejected empty-text-content");
return NodeFilter.FILTER_REJECT;
}
if (node.nodeType === 1 && !this.hasInlineContent(node)) {
node.setAttribute("x-bergamot-translated", "skipped does-not-have-text-of-its-own");
if (!this.hasInlineContent(node)) {
mark("skipped does-not-have-text-of-its-own");
return NodeFilter.FILTER_SKIP; // otherwise dig deeper
}
if (node.nodeType === 1 && this.containsExcludedNode(node) && !this.hasTextNodes(node)) {
node.setAttribute("x-bergamot-translated", "skipped contains-excluded-node");
if (this.containsExcludedNode(node) && !this.hasTextNodes(node)) {
mark("skipped contains-excluded-node");
return NodeFilter.FILTER_SKIP; // otherwise dig deeper
}
return NodeFilter.FILTER_ACCEPT; // send whole node as 1 block
}
queueTranslation(node) {
/*
* used by TreeWalker to determine whether to ACCEPT, REJECT or SKIP a
* subtree. Checks whether element is acceptable, and hasn't been
* translated already.
*/
validateNodeForQueue(node) {
// skip nodes already seen (for the partial subtree change, or restart of the whole InPageTranslation process.)
if (this.processedNodes.has(node)) return NodeFilter.FILTER_REJECT;
/*
* let's store the node to keep its reference
* and send it to the translation worker
*/
return this.validateNode(node);
}
/*
* enqueue a node for translation. Called during startTreeWalker. Queues
* are emptied by dispatchTranslation().
*/
queueTranslation(node) {
this.translationsCounter += 1;
// debugging: mark the node so we can add CSS to see them
if (node.nodeType === 1) {
node.setAttribute("x-bergamot-translated", this.translationsCounter);
// let's categorize the elements on their respective hashmaps
if (this.isElementHidden(node)) {
// if the element is entirely hidden
this.hiddenNodeMap.set(this.translationsCounter, node);
} else if (this.isElementInViewport(node)) {
// if the element is present in the viewport
this.viewportNodeMap.set(this.translationsCounter, node);
} else {
// if the element is visible but not present in the viewport
this.nonviewportNodeMap.set(this.translationsCounter, node);
}
this.nodesSent.add(node);
}
if (node.nodeType === Node.ELEMENT_NODE) node.setAttribute("x-bergamot-translated", this.translationsCounter);
let priority = 2;
if (this.isElementHidden(node)) priority = 3;
else if (this.isElementInViewport(node)) priority = 1;
this.queuedNodes.set(node, {
id: this.translationsCounter,
priority
});
}
dispatchTranslations() {
this.reportWordsInViewport();
// we then submit for translation the elements in order of priority
this.processingNodeMap = "viewportNodeMap";
this.viewportNodeMap.forEach(this.submitTranslation, this);
this.processingNodeMap = "nonviewportNodeMap";
this.nonviewportNodeMap.forEach(this.submitTranslation, this);
this.processingNodeMap = "hiddenNodeMap";
this.hiddenNodeMap.forEach(this.submitTranslation, this);
const queuesPerPriority = [null, [], [], []] // priorities 1 to 3
this.queuedNodes.forEach((message, node) => {
queuesPerPriority[message.priority].push({ message, node });
});
for (let priority = 1; priority <= 3; priority += 1) {
queuesPerPriority[priority].forEach(({ message, node }) => {
this.submitTranslation(message, node);
});
}
this.queuedNodes.clear();
}
reportWordsInViewport() {
if (this.initialWordsInViewportReported || this.viewportNodeMap.size === 0) return;
if (this.initialWordsInViewportReported || this.queuedNodes.size === 0) return;
let viewPortWordsNum = 0;
for (const [, value] of this.viewportNodeMap.entries()) {
viewPortWordsNum += value.textContent.trim().split(/\s+/).length;
for (const [message, value] of this.queuedNodes.entries()) {
if (message.priority === 3) {
viewPortWordsNum += value.textContent.trim().split(/\s+/).length;
}
}
this.notifyMediator("reportViewPortWordsNum", viewPortWordsNum);
@ -470,40 +666,34 @@ class InPageTranslation {
this.initialWordsInViewportReported = true;
}
submitTranslation(node, key) {
if (this.messagesSent.has(key)) {
// if we already sent this message, we just skip it
return;
submitTranslation({ id }, node) {
// give each element an id that gets passed through the translation so we can later on reunite it.
if (node.nodeType === Node.ELEMENT_NODE) {
node.querySelectorAll("*").forEach((el, i) => {
el.dataset.xBergamotId = i;
});
}
/*
* give each element an id that gets passed through the translation so
* we can later on reunite it.
*/
node.querySelectorAll("*").forEach((el, i) => {
el.dataset.xBergamotId = i;
});
const text = node.nodeType === Node.ELEMENT_NODE
? node.innerHTML
: node.textContent;
if (text.trim().length === 0) return;
const text = node.innerHTML;
if (text.trim().length) {
/*
* send the content back to mediator in order to have the translation
* requested by it
*/
const payload = {
this.notifyMediator("translate", {
text,
isHTML: node.nodeType === Node.ELEMENT_NODE,
type: "inpage",
withOutboundTranslation: this.withOutboundTranslation,
withQualityEstimation: this.withQualityEstimation,
attrId: [
this.processingNodeMap,
key
],
};
this.notifyMediator("translate", payload);
this.messagesSent.add(key);
}
attrId: [id],
});
// keep reference to this node for once we receive a translation response.
this.pendingTranslations.set(id, node);
this.submittedNodes.set(node, id);
// also mark this node as not to be translated again unless the contents are changed (which the observer will pick up on)
this.processedNodes.add(node);
}
notifyMediator(command, payload) {
@ -511,16 +701,17 @@ class InPageTranslation {
}
startMutationObserver() {
this.observer.observe(document, {
characterData: true,
childList: true,
subtree: true
});
for (let node of this.targetNodes) {
this.observer.observe(node, {
characterData: true,
childList: true,
subtree: true
});
}
}
stopMutationObserver() {
this.observer.disconnect();
this.observer.disconnect();
}
@ -535,7 +726,7 @@ class InPageTranslation {
}
updateElements() {
const updateElement = (translatedHTML, node) => {
const updateElement = ({ translatedHTML }, node) => {
// console.groupCollapsed(computePath(node));
node.setAttribute("x-bergamot-translated", "");
@ -557,33 +748,6 @@ class InPageTranslation {
const clonedNodes = new Set();
const removeTextNodes = node => {
Array.from(node.childNodes).forEach(child => {
switch (child.nodeType) {
case Node.TEXT_NODE:
node.removeChild(child);
break;
case Node.ELEMENT_NODE:
removeTextNodes(child);
break;
default:
break;
}
});
};
// check (recursively) if a given node and all its children have only QE specific attributes
const hasOnlyQEAttributes = node => {
if (node.nodeType === Node.ELEMENT_NODE) {
if (node.nodeName.toUpperCase() !== "FONT") return false;
if (!node.getAttributeNames().every(attribute => this.qeAttributes.has(attribute))) return false;
for (let child of node.children) if (!hasOnlyQEAttributes(child)) return false;
}
return true;
};
/*
* merge the live tree (dst) with the translated tree (src) by
* re-using elements from the live tree.
@ -595,13 +759,16 @@ class InPageTranslation {
* an (indexed) reference to them since we will be adding them
* back, but possibly in a different order.
*/
const dstChildNodes = Object.fromEntries(Array.from(dst.childNodes)
.map(child => dst.removeChild(child))
const nodes = Array.from(dst.childNodes).map(child => dst.removeChild(child));
const dstChildNodes = Object.fromEntries(nodes
.filter(child => child.nodeType === Node.ELEMENT_NODE)
.map(child => [child.dataset.xBergamotId, child]));
const dstTextNodes = nodes.filter(child => child.nodeType === Node.TEXT_NODE);
// src (translated) dictates the order.
Array.from(src.childNodes).forEach(child => {
Array.from(src.childNodes).forEach((child, index, siblings) => {
// element nodes we try to use the already existing DOM nodes
if (child.nodeType === Node.ELEMENT_NODE) {
@ -619,7 +786,7 @@ class InPageTranslation {
* when QE is on) then just add the translated element child to the live
* element node.
*/
if (!child.hasAttribute("data-x-bergamot-id") && hasOnlyQEAttributes(child)) {
if (!child.hasAttribute("data-x-bergamot-id") && this.hasOnlyQEAttributes(child)) {
dst.appendChild(child);
} else {
console.warn(`[InPlaceTranslation] ${this.computePath(child, scratch)} Could not find counterpart for`, child.dataset.xBergamotId, dstChildNodes, child);
@ -650,7 +817,20 @@ class InPageTranslation {
* the one that came out of translation doesn't?
*/
console.warn(`[InPlaceTranslation] ${this.computePath(child, scratch)} Child ${child.outerHTML} has no text but counterpart ${counterpart.outerHTML} does`);
removeTextNodes(counterpart); // this should not be necessary
/*
* todo: This scenario might be caused by one of two
* causes: 1) element was duplicated by translation
* but then not given text content. This happens on
* Wikipedia articles for example.
* Or 2) the translator messed up and could not
* translate the text. This happens on Youtube in the
* language selector. In that case, having the original
* text is much better than no text at all.
* To make sure it is this case, and not option 2
* we check whether this is the only occurrence.
*/
if (siblings.some((sibling, i) => sibling.nodeType === Node.ELEMENT_NODE && index !== i && child.dataset.xBergamotId === sibling.dataset.xBergamotId)) this.removeTextNodes(counterpart);
}
/*
@ -658,9 +838,13 @@ class InPageTranslation {
* it has been synced with the translated text and order.
*/
dst.appendChild(counterpart);
} else {
// all other node types we just copy in directly
dst.appendChild(child);
} else if (child.nodeType === Node.TEXT_NODE) {
let counterpart = dstTextNodes.shift();
if (typeof counterpart !== "undefined") counterpart.data = child.data;
else counterpart = child;
dst.appendChild(counterpart);
}
});
@ -679,31 +863,58 @@ class InPageTranslation {
merge(node, scratch);
return node;
};
const updateTextNode = ({ translatedHTML }, node) => {
/*
* remove node again from nodesSent because someone might change
* the innerHTML or add children, and then we want to translate
* those.
* TODO: what if a node was mutated while translation was pending?
* Will that mutation then be ignored?
* regardless of withQualityEstimation, if translatedHTML is empty
* we have an empty string as output. Which is useless.
*/
this.nodesSent.delete(node);
if (translatedHTML.trim().length === 0) {
console.warn("[InPlaceTranslation] text node", node, "translated to <empty string>");
return node;
}
/*
* is this a good idea?
* this.nodesSent.delete(node);
* console.groupEnd(computePath(node));
* when we're getting quality estimations back, translatedHTML is
* indeed actual HTML with font tags containing that info.
*/
if (this.withQualityEstimation) {
const nonLiveDomContainer = document.createElement("template");
nonLiveDomContainer.innerHTML = translatedHTML;
const fragment = document.createDocumentFragment();
for (let child of nonLiveDomContainer.content.childNodes) {
if (this.hasOnlyQEAttributes(child)) {
fragment.appendChild(child);
}
}
node.parentNode.replaceChild(fragment, node);
return fragment;
}
node.textContent = translatedHTML;
return node;
};
// pause observing mutations
this.stopMutationObserver();
try {
this.updateMap.forEach(updateElement);
// let's test this here to prevent unnecessary traversals
if (this.withQualityEstimation) this.reportQualityEstimation(this.updateMap.keys());
this.updateMap.clear();
const touchedNodes = Array.from(this.translatedNodes, ([node, message]) => {
switch (node.nodeType) {
case Node.TEXT_NODE:
return updateTextNode(message, node);
case Node.ELEMENT_NODE:
return updateElement(message, node);
default:
return node; // never happens
}
});
if (this.withQualityEstimation) this.reportQualityEstimation(touchedNodes);
this.translatedNodes.clear();
this.updateTimeout = null;
if (this.withQualityEstimation) this.addQualityClasses();
} finally {
@ -736,35 +947,29 @@ class InPageTranslation {
}
enqueueElement(translationMessage) {
const [
hashMapName,
idCounter
] = translationMessage.attrId;
const translatedText = translationMessage.translatedParagraph;
// console.log("no enqueue", translatedText);
let targetNode = null;
switch (hashMapName) {
case "hiddenNodeMap":
targetNode = this.hiddenNodeMap.get(idCounter);
this.hiddenNodeMap.delete(idCounter);
break;
case "viewportNodeMap":
targetNode = this.viewportNodeMap.get(idCounter);
this.viewportNodeMap.delete(idCounter);
break;
case "nonviewportNodeMap":
targetNode = this.nonviewportNodeMap.get(idCounter);
this.nonviewportNodeMap.delete(idCounter);
break;
default:
break;
}
this.messagesSent.delete(idCounter);
this.updateMap.set(targetNode, translatedText);
// we finally schedule the UI update
if (!this.updateTimeout) {
this.updateTimeout = setTimeout(this.updateElements.bind(this),this.UI_UPDATE_INTERVAL);
const [id] = translationMessage.attrId;
const translatedHTML = translationMessage.translatedParagraph;
// look up node by message id. This can fail
const node = this.pendingTranslations.get(id);
if (typeof node === "undefined") {
console.debug("[in-page-translation] Message",id,"is not found in pendingTranslations");
return;
}
// prune it.
this.pendingTranslations.delete(id);
// node still exists! Remove node -> (pending) message mapping
this.submittedNodes.delete(node);
// queue node to be populated with translation next update.
this.translatedNodes.set(node, { id, translatedHTML });
// we schedule the UI update
if (!this.updateTimeout) this.updateTimeout = setTimeout(this.updateElements.bind(this), this.submittedNodes.size === 0
? 0
: this.UI_UPDATE_INTERVAL);
}
computePath(node, root) {
@ -778,4 +983,37 @@ class InPageTranslation {
else if (node.className) path += `.${Array.from(node.classList).join(".")}`;
return path;
}
*ancestors(node) {
for (let parent = node.parentNode; parent && parent !== document.documentElement; parent = parent.parentNode) yield parent;
}
/*
* check (recursively) if a given node and all its children have only QE specific attributes
*/
hasOnlyQEAttributes(node) {
if (node.nodeType === Node.ELEMENT_NODE) {
if (node.nodeName.toUpperCase() !== "FONT") return false;
if (!node.getAttributeNames().every(attribute => this.qeAttributes.has(attribute))) return false;
for (let child of node.children) if (!this.hasOnlyQEAttributes(child)) return false;
}
return true;
}
removeTextNodes(node) {
Array.from(node.childNodes).forEach(child => {
switch (child.nodeType) {
case Node.TEXT_NODE:
node.removeChild(child);
break;
case Node.ELEMENT_NODE:
this.removeTextNodes(child);
break;
default:
break;
}
});
}
}