From 8e304e73f3b83288962b41f316f655fcd0a4295e Mon Sep 17 00:00:00 2001 From: Andre Date: Thu, 25 Nov 2021 05:46:15 -0700 Subject: [PATCH] Implementing batching --- .../controller/translation/Translation.js | 31 ++++++++- .../translation/translationWorker.js | 64 +++++++++++-------- extension/manifest.json | 4 +- extension/mediator.js | 13 ++-- .../translation => model}/Queue.js | 0 extension/view/js/InPageTranslation.js | 5 +- 6 files changed, 78 insertions(+), 39 deletions(-) rename extension/{controller/translation => model}/Queue.js (100%) diff --git a/extension/controller/translation/Translation.js b/extension/controller/translation/Translation.js index dbcbdd3..4f95d21 100644 --- a/extension/controller/translation/Translation.js +++ b/extension/controller/translation/Translation.js @@ -9,6 +9,10 @@ class Translation { constructor (mediator){ this.translationsMessagesCounter = 0; + this.TRANSLATION_INTERVAL = 100; // ms + this.MAX_TRANSLATION_MSGS = 500; // max translations to process per batch + this.translateSchedule = null; // holds a reference to the translation setTimeout + this.translationMessageBuffer = new Queue(); this.mediator = mediator; const engineLocalPath = browser.runtime.getURL("controller/translation/bergamot-translator-worker.js"); const engineRemoteRegistry = browser.runtime.getURL("model/engineRegistry.js"); @@ -62,12 +66,37 @@ class Translation { * the message to the worker */ translate(translationMessage) { + // add this message to the queue + this.translationMessageBuffer.enqueue(translationMessage); + + // and schedule an update if required + if (!this.translateSchedule) { + this.translateSchedule = setTimeout(this.submitMessages.bind(this), this.TRANSLATION_INTERVAL); + } + } + + submitMessages() { + // timeout invoked. let's submit the messages + const messagesToGo = new Array(); + + // we'll process until the buffer is empty or we reach + while (!this.translationMessageBuffer.isEmpty() && messagesToGo.length < this.MAX_TRANSLATION_MSGS) { + const message = this.translationMessageBuffer.dequeue(); + messagesToGo.push(message); + } if (this.translationWorker) { this.translationWorker.postMessage([ "translate", - translationMessage + messagesToGo ]); } + + // and schedule an update if required + if (this.translationMessageBuffer.length() > 0) { + setTimeout(this.submitMessages.bind(this), this.TRANSLATION_INTERVAL); + } + // inform it is complete + this.translateSchedule = null; } // eslint-disable-next-line max-params diff --git a/extension/controller/translation/translationWorker.js b/extension/controller/translation/translationWorker.js index 3beb121..ed239b2 100644 --- a/extension/controller/translation/translationWorker.js +++ b/extension/controller/translation/translationWorker.js @@ -71,30 +71,36 @@ class TranslationHelper { consumeTranslationQueue() { while (this.translationQueue.length() > 0) { - const translationMessage = this.translationQueue.dequeue(); + const translationMessagesBatch = this.translationQueue.dequeue(); Promise.resolve().then(function () { - // if there's a paragraph, then we translate - if (translationMessage.sourceParagraph) { + if (translationMessagesBatch) { - const t0 = performance.now(); - // translate the input, which is a vector; the result is a vector let total_words = 0; - translationMessage.sourceParagraph.forEach(paragraph => { - total_words += paragraph.trim().split(" ").length; - }) + translationMessagesBatch.forEach(message => { + message.sourceParagraph.forEach(paragraph => { + total_words += paragraph.trim().split(" ").length; + }); + }); - const translation = this.translate( - translationMessage.sourceLanguage, - translationMessage.targetLanguage, - translationMessage.sourceParagraph + console.log(" twarray to translate:", translationMessagesBatch); + const t0 = performance.now(); + const translationResultBatch = this.translate( + translationMessagesBatch ); const timeElapsed = [total_words, performance.now() - t0]; - - // now that we have a translation, let's report to the mediator - translationMessage.translatedParagraph = [translation, timeElapsed]; + console.log(" twarray translated:", translationMessagesBatch); + + // now that we have the paragraphs back, let's reconstruct them. + // we trust the engine will return the paragraphs always in the same order + // we requested + translationResultBatch.forEach( (result, index) => { + translationMessagesBatch[index].translatedParagraph = result; + }); + //and then report to the mediator postMessage([ "translationComplete", - translationMessage + translationMessagesBatch, + timeElapsed ]); } }.bind(this)); @@ -119,8 +125,8 @@ class TranslationHelper { this.engineState = this.ENGINE_STATE.LOADING; // and load the module this.loadTranslationEngine( - message.sourceLanguage, - message.targetLanguage + message[0].sourceLanguage, + message[0].targetLanguage ); this.translationQueue.enqueue(message); break; @@ -390,20 +396,23 @@ class TranslationHelper { return alignedMemory; } - translate (from, to, paragraphs) { + translate (messages) { /* * if none of the languages is English then perform translation with * english as a pivot language. */ + const from = messages[0].sourceLanguage; + const to = messages[0].targetLanguage; + if (from !== "en" && to !== "en") { - let translatedParagraphsInEnglish = this.translateInvolvingEnglish(from, "en", paragraphs); + let translatedParagraphsInEnglish = this.translateInvolvingEnglish(from, "en", messages); return this.translateInvolvingEnglish("en", to, translatedParagraphsInEnglish); } - return this.translateInvolvingEnglish(from, to, paragraphs); + return this.translateInvolvingEnglish(from, to, messages); } - translateInvolvingEnglish (from, to, paragraphs) { + translateInvolvingEnglish (from, to, messages) { const languagePair = `${from}${to}`; if (!this.translationModels.has(languagePair)) { throw Error(`Please load translation model '${languagePair}' before translating`); @@ -417,14 +426,13 @@ class TranslationHelper { const responseOptions = { qualityScores: true, alignment: false, html: true }; let input = new this.WasmEngineModule.VectorString(); - // initialize the input - paragraphs.forEach(paragraph => { + messages.forEach(message => { // prevent empty paragraph - it breaks the translation - if (paragraph.trim() === "") { + if (message.sourceParagraph[0].trim() === "") { return; } - input.push_back(paragraph); - }) + input.push_back(message.sourceParagraph[0]); + }); // translate the input, which is a vector; the result is a vector let result = this.translationService.translate(translationModel, input, responseOptions); @@ -481,7 +489,7 @@ const translationHelper = new TranslationHelper(postMessage); onmessage = function(message) { switch (message.data[0]) { case "configEngine": - importScripts("Queue.js"); + importScripts("/model/Queue.js"); importScripts(message.data[1].engineLocalPath); importScripts(message.data[1].engineRemoteRegistry); importScripts(message.data[1].modelRegistry); diff --git a/extension/manifest.json b/extension/manifest.json index 516fb2c..ea64384 100644 --- a/extension/manifest.json +++ b/extension/manifest.json @@ -26,7 +26,7 @@ "matches": [""], "js": [ "controller/LanguageDetection.js", - "controller/translation/Queue.js", + "model/Queue.js", "controller/translation/Translation.js", "controller/translation/TranslationMessage.js", "controller/translation/translationWorker.js", @@ -41,7 +41,7 @@ ], "web_accessible_resources": [ "ot.html", - "controller/translation/Queue.js", + "model/Queue.js", "controller/translation/translationWorker.js" ], "incognito": "spanning", diff --git a/extension/mediator.js b/extension/mediator.js index 397274e..aff0c91 100644 --- a/extension/mediator.js +++ b/extension/mediator.js @@ -99,14 +99,17 @@ class Mediator { /* * received the translation complete signal * from the translation object. so we lookup the sender - * in order to route the response back. in this this, it can be + * in order to route the response back, which can be * OutbountTranslation, InPageTranslation etc.... */ - this.messagesSenderLookupTable.get(message.payload[1].messageID) - .mediatorNotification(message); - this.messagesSenderLookupTable.delete(message.payload[1].messageID); + message.payload[1].forEach(translationMessage => { + this.messagesSenderLookupTable.get(translationMessage.messageID) + .mediatorNotification(translationMessage); + this.messagesSenderLookupTable.delete(translationMessage.messageID); + }); + // eslint-disable-next-line no-case-declarations - const wordsPerSecond = this.telemetry.addAndGetTranslationTimeStamp(message.payload[1].translatedParagraph[1]); + const wordsPerSecond = this.telemetry.addAndGetTranslationTimeStamp(message.payload[2]); if (this.statsMode) { // if the user chose to see stats in the infobar, we display them diff --git a/extension/controller/translation/Queue.js b/extension/model/Queue.js similarity index 100% rename from extension/controller/translation/Queue.js rename to extension/model/Queue.js diff --git a/extension/view/js/InPageTranslation.js b/extension/view/js/InPageTranslation.js index 453b5a6..c6de281 100644 --- a/extension/view/js/InPageTranslation.js +++ b/extension/view/js/InPageTranslation.js @@ -132,7 +132,6 @@ class InPageTranslation { this.hiddenNodeMap.forEach(this.submitTranslation, this); } - submitTranslation(node, key) { if (this.messagesSent.has(key)) { // if we already sent this message, we just skip it @@ -209,8 +208,8 @@ class InPageTranslation { const [ hashMapName, idCounter - ] = translationMessage.payload[1].attrId; - const translatedText = translationMessage.payload[1].translatedParagraph[0].join("\n\n") + ] = translationMessage.attrId; + const translatedText = translationMessage.translatedParagraph; let targetNode = null; switch (hashMapName) { case "hiddenNodeMap":