Setup WASM test infrastructure for CI (#920)

* Rename task `inference-test` to `inference-test-local`

This commit clarifies that these tests are the local C++
unit tests, now that we will be adding WASM JS tests to CI.

* Remove unused WASM test-page files

This removes the files for building a WASM test page that
functions very similarly to the `about:translations` page
in Firefox.

I kept these files around during the initial clone of the
repository, because I wasn't sure yet if I wanted to use
anything here for testing in CI, but I think it will be
cleaner to just make new CI tests.

* Add vitest infrastructure for WASM CI tests

https://vitest.dev/ seems like a simple, easy-to-use
JS testing famework that is installable and configurable
with NPM.

This patch only introduces a stubbed test file with a basic
assertion, but I plan to use this to test the WASM bindings
and outputs more thoroughly in a subsequent PR.

* Add task `inference-test-wasm`

This commit adds a new task for `inference-test-wasm` that also
runs in CI on relevant PRs that touch inference-related sections
of the code.

* Ensure macOS host dependencies build on 1 thread

There is an issue with building WASM using multiple
threads on Aarch64 macOS chips. It works on a single
thread. This patch exposes the host operating system
type to the Docker container so that the `inference-test-wasm`
task can determine how many threads to use.
This commit is contained in:
Erik Nordin 2024-11-08 13:30:22 -06:00 коммит произвёл GitHub
Родитель 28de0c8c2d
Коммит db60f54acd
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
22 изменённых файлов: 1374 добавлений и 3248 удалений

Просмотреть файл

@ -87,18 +87,31 @@ tasks:
- >-
./inference/scripts/build-local.sh
inference-test:
desc: Run inference tests.
cmds:
- >-
./inference/scripts/unit-tests.sh
inference-build-wasm:
desc: Build inference engine WASM.
cmds:
- >-
./inference/scripts/build-wasm.py {{.CLI_ARGS}}
inference-test-local:
desc: Run inference build-local C++ tests.
cmds:
- >-
./inference/scripts/unit-tests.sh
inference-test-wasm:
desc: Run inference build-wasm JS tests.
deps:
- task: inference-build-wasm
vars:
# When the host system is macOS, the WASM build fails when
# building with multiple threads in the Docker container.
# If the host system is macOS, pass -j 1.
CLI_ARGS: '{{if eq (env "HOST_OS") "Darwin"}}-j 1{{end}}'
cmds:
- >-
cd inference/wasm/tests && npm install && npm run test
lint-black:
desc: Checks the styling of the Python code with Black.
deps: [poetry-install-black]

Просмотреть файл

@ -5,7 +5,7 @@ set -e
cd "$(dirname $0)/.."
# Ensure script is running within docker
./scripts/detect-docker.sh inference-test
./scripts/detect-docker.sh inference-test-local
# Check if build-local/src/tests/units directory exists
if [ ! -d "build-local/src/tests/units" ]; then

Просмотреть файл

@ -1,21 +0,0 @@
import * as readline from 'node:readline/promises';
import {stdin, stdout} from 'node:process';
import {BatchTranslator} from "./translator.js";
const rl = readline.createInterface({input: stdin, output: stdout});
const translator = new BatchTranslator();
for await (const line of rl) {
const response = await translator.translate({
from: "en",
to: "es",
text: line,
html: false,
qualityScores: false
});
console.log(response.target.text);
}
translator.delete();

Просмотреть файл

@ -1,39 +0,0 @@
{
"name": "@browsermt/bergamot-translator",
"version": "0.4.9",
"description": "Cross platform C++ library focusing on optimized machine translation on the consumer-grade device.",
"homepage": "https://github.com/browsermt/bergamot-translator#readme",
"repository": {
"type": "git",
"url": "git+ssh://git@github.com/browsermt/bergamot-translator.git"
},
"keywords": [
"machine",
"translation"
],
"author": "",
"license": "MPL-2.0",
"bugs": {
"url": "https://github.com/browsermt/bergamot-translator/issues"
},
"type": "module",
"main": "translator.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"files": [
"worker/bergamot-translator-worker.js",
"worker/bergamot-translator-worker.wasm",
"worker/translator-worker.js",
"translator.js",
"main.js"
],
"config": {
"emscripten_version": "3.1.8"
},
"scripts": {
"prepare": "test -f worker/bergamot-translator-worker.wasm || npm run build",
"build": "mkdir -p ../../build-wasm && docker run --rm -v $(realpath ../../):/src -v $(realpath ../../build-wasm):/build -v $(pwd)/worker:/dst -w /build emscripten/emsdk:$npm_package_config_emscripten_version sh -c \"emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=off /src && emmake make -j2 && cp bergamot-translator-worker.wasm bergamot-translator-worker.js /dst\"",
"test": "echo \"Hello world!\" | node main.js"
}
}

Просмотреть файл

@ -1,879 +0,0 @@
/**
* @typedef {Object} TranslationRequest
* @property {String} from
* @property {String} to
* @property {String} text
* @property {Boolean} html
* @property {Integer?} priority
*/
/**
* @typedef {Object} TranslationResponse
* @property {TranslationRequest} request
* @property {{text: string}} target
*/
/**
* NodeJS compatibility, a thin WebWorker layer around node:worker_threads.
*/
if (!(typeof window !== 'undefined' && window.Worker)) {
globalThis.Worker = class {
#worker;
constructor(url) {
this.#worker = new Promise(async (accept) => {
const {Worker} = await import(/* webpackIgnore: true */ 'node:worker_threads');
accept(new Worker(url));
});
}
addEventListener(eventName, callback) {
this.#worker.then(worker => worker.on(eventName, (data) => callback({data})));
}
postMessage(message) {
this.#worker.then(worker => worker.postMessage(message));
}
terminate() {
this.#worker.then(worker => worker.terminate());
}
}
}
/**
* Thrown when a pending translation is replaced by another newer pending
* translation.
*/
export class SupersededError extends Error {}
/**
* Thrown when a translation was removed from the queue.
*/
export class CancelledError extends Error {}
/**
* Wrapper around bergamot-translator loading and model management.
*/
export class TranslatorBacking {
/**
* @param {{
* cacheSize?: number,
* useNativeIntGemm?: boolean,
* downloadTimeout?: number,
* registryUrl?: string
* pivotLanguage?: string?
* onerror?: (err: Error)
* }} options
*/
constructor(options) {
this.options = options || {};
this.registryUrl = this.options.registryUrl || 'https://bergamot.s3.amazonaws.com/models/index.json';
this.downloadTimeout = 'downloadTimeout' in this.options ? parseInt(this.options.downloadTimeout) : 60000;
/**
* registry of all available models and their urls
* @type {Promise<Model[]>}
*/
this.registry = this.loadModelRegistery();
/**
* Map of downloaded model data files as buffers per model.
* @type {Map<{from:string,to:string}, Promise<Map<string,ArrayBuffer>>>}
*/
this.buffers = new Map();
/**
* @type {string?}
*/
this.pivotLanguage = 'pivotLanguage' in this.options ? options.pivotLanguage : 'en';
/**
* A map of language-pairs to a list of models you need for it.
* @type {Map<{from:string,to:string}, Promise<{from:string,to:string}[]>>}
*/
this.models = new Map();
/**
* Error handler for all errors that are async, not tied to a specific
* call and that are unrecoverable.
* @type {(error: Error)}
*/
this.onerror = this.options.onerror || (err => console.error('WASM Translation Worker error:', err));
}
/**
* Loads a worker thread, and wraps it in a message passing proxy. I.e. it
* exposes the entire interface of TranslationWorker here, and all calls
* to it are async. Do note that you can only pass arguments that survive
* being copied into a message.
* @return {Promise<{worker:Worker, exports:Proxy<TranslationWorker>}>}
*/
async loadWorker() {
const worker = new Worker(new URL('./worker/translator-worker.js', import.meta.url));
/**
* Incremental counter to derive request/response ids from.
*/
let serial = 0;
/**
* Map of pending requests
* @type {Map<number,{accept:(any), reject:(Error)}>}
*/
const pending = new Map();
// Function to send requests
const call = (name, ...args) => new Promise((accept, reject) => {
const id = ++serial;
pending.set(id, {
accept,
reject,
callsite: { // for debugging which call caused the error
message: `${name}(${args.map(arg => String(arg)).join(', ')})`,
stack: new Error().stack
}
});
worker.postMessage({id, name, args});
});
// … receive responses
worker.addEventListener('message', function({data: {id, result, error}}) {
if (!pending.has(id)) {
console.debug('Received message with unknown id:', arguments[0]);
throw new Error(`BergamotTranslator received response from worker to unknown call '${id}'`);
}
const {accept, reject, callsite} = pending.get(id);
pending.delete(id);
if (error !== undefined)
reject(Object.assign(new Error(), error, {
message: error.message + ` (response to ${callsite.message})`,
stack: error.stack ? `${error.stack}\n${callsite.stack}` : callsite.stack
}));
else
accept(result);
});
// … and general errors
worker.addEventListener('error', this.onerror.bind(this));
// Await initialisation. This will also nicely error out if the WASM
// runtime fails to load.
await call('initialize', this.options);
/**
* Little wrapper around the message passing api of Worker to make it
* easy to await a response to a sent message. This wraps the worker in
* a Proxy so you can treat it as if it is an instance of the
* TranslationWorker class that lives inside the worker. All function
* calls to it are transparently passed through the message passing
* channel.
*/
return {
worker,
exports: new Proxy({}, {
get(target, name, receiver) {
// Prevent this object from being marked "then-able"
if (name !== 'then')
return (...args) => call(name, ...args);
}
})
};
}
/**
* Loads the model registry. Uses the registry shipped with this extension,
* but formatted a bit easier to use, and future-proofed to be swapped out
* with a TranslateLocally type registry.
* @return {Promise<{
* from: string,
* to: string,
* files: {
* [part:string]: {
* name: string,
* size: number,
* expectedSha256Hash: string
* }
* }[]
* }>}
*/
async loadModelRegistery() {
const response = await fetch(this.registryUrl, {credentials: 'omit'});
const registry = await response.json();
// Add 'from' and 'to' keys for each model.
return Array.from(Object.entries(registry), ([key, files]) => {
return {
from: key.substring(0, 2),
to: key.substring(2, 4),
files
}
});
}
/**
* Gets or loads translation model data. Caching wrapper around
* `loadTranslationModel()`.
* @param {{from:string, to:string}}
* @return {Promise<{
* model: ArrayBuffer,
* vocab: ArrayBuffer,
* shortlist: ArrayBuffer,
* qualityModel: ArrayBuffer?
* }>}
*/
getTranslationModel({from, to}, options) {
const key = JSON.stringify({from, to});
if (!this.buffers.has(key)) {
const promise = this.loadTranslationModel({from, to}, options);
// set the promise so we return the same promise when its still pending
this.buffers.set(key, promise);
// But if loading fails, remove the promise again so we can try again later
promise.catch(err => this.buffers.delete(key))
}
return this.buffers.get(key);
}
/**
* Downloads a translation model and returns a set of
* ArrayBuffers. These can then be passed to a TranslationWorker thread
* to instantiate a TranslationModel inside the WASM vm.
* @param {{from:string, to:string}}
* @param {{signal:AbortSignal?}?}
* @return {Promise<{
* model: ArrayBuffer,
* vocab: ArrayBuffer,
* shortlist: ArrayBuffer,
* qualityModel: ArrayBuffer?
* config: string?
* }>}
*/
async loadTranslationModel({from, to}, options) {
performance.mark(`loadTranslationModule.${JSON.stringify({from, to})}`);
// Find that model in the registry which will tell us about its files
const entries = (await this.registry).filter(model => model.from == from && model.to == to);
if (!entries)
throw new Error(`No model for '${from}' -> '${to}'`);
const files = entries[0].files;
const abort = () => reject(new CancelledError('abort signal'));
// Promise that resolves (or rejects really) when the abort signal hits
const escape = new Promise((accept, reject) => {
if (options?.signal)
options.signal.addEventListener('abort', abort);
});
// Download all files mentioned in the registry entry. Race the promise
// of all fetch requests, and a promise that rejects on the abort signal
const buffers = Object.fromEntries(await Promise.race([
Promise.all(Object.entries(files).map(async ([part, file]) => {
// Special case where qualityModel is not part of the model, and this
// should also catch the `config` case.
if (file === undefined || file.name === undefined)
return [part, null];
try {
return [part, await this.fetch(file.name, file.expectedSha256Hash, options)];
} catch (cause) {
throw new Error(`Could not fetch ${file.name} for ${from}->${to} model`, {cause});
}
})),
escape
]));
// Nothing to abort now, clean up abort promise
if (options?.signal)
options.signal.removeEventListener('abort', abort);
performance.measure('loadTranslationModel', `loadTranslationModule.${JSON.stringify({from, to})}`);
let vocabs = [];
if (buffers.vocab)
vocabs = [buffers.vocab]
else if (buffers.trgvocab && buffers.srcvocab)
vocabs = [buffers.srcvocab, buffers.trgvocab]
else
throw new Error(`Could not identify vocab files for ${from}->${to} model among: ${Array.from(Object.keys(files)).join(' ')}`);
let config = {};
// For the Ukrainian models we need to override the gemm-precision
if (files.model.name.endsWith('intgemm8.bin'))
config['gemm-precision'] = 'int8shiftAll';
// If quality estimation is used, we need to turn off skip-cost. Turning
// this off causes quite the slowdown.
if (files.qualityModel)
config['skip-cost'] = false;
// Allow the registry to also specify marian configuration parameters
if (files.config)
Object.assign(config, files.config);
// Translate to generic bergamot-translator format that also supports
// separate vocabularies for input & output language, and calls 'lex'
// a more descriptive 'shortlist'.
return {
model: buffers.model,
shortlist: buffers.lex,
vocabs,
qualityModel: buffers.qualityModel,
config
};
}
/**
* Helper to download file from the web. Verifies the checksum.
* @param {string} url
* @param {string?} checksum sha256 checksum as hexadecimal string
* @param {{signal:AbortSignal}?} extra fetch options
* @returns {Promise<ArrayBuffer>}
*/
async fetch(url, checksum, extra) {
// Rig up a timeout cancel signal for our fetch
const controller = new AbortController();
const abort = () => controller.abort();
const timeout = this.downloadTimeout ? setTimeout(abort, this.downloadTimeout) : null;
try {
// Also maintain the original abort signal
if (extra?.signal)
extra.signal.addEventListener('abort', abort);
const options = {
credentials: 'omit',
signal: controller.signal,
};
if (checksum)
options['integrity'] = `sha256-${this.hexToBase64(checksum)}`;
// Disable the integrity check for NodeJS because of
// https://github.com/nodejs/undici/issues/1594
if (typeof window === 'undefined')
delete options['integrity'];
// Start downloading the url, using the hex checksum to ask
// `fetch()` to verify the download using subresource integrity
const response = await fetch(url, options);
// Finish downloading (or crash due to timeout)
return await response.arrayBuffer();
} finally {
if (timeout)
clearTimeout(timeout);
if (extra?.signal)
extra.signal.removeEventListener('abort', abort);
}
}
/**
* Converts the hexadecimal hashes from the registry to something we can use with
* the fetch() method.
*/
hexToBase64(hexstring) {
return btoa(hexstring.match(/\w{2}/g).map(function(a) {
return String.fromCharCode(parseInt(a, 16));
}).join(""));
}
/**
* Crappy named method that gives you a list of models to translate from
* one language into the other. Generally this will be the same as you
* just put in if there is a direct model, but it could return a list of
* two models if you need to pivot through a third language.
* Returns just [{from:str,to:str}...]. To be used something like this:
* ```
* const models = await this.getModels(from, to);
* models.forEach(({from, to}) => {
* const buffers = await this.loadTranslationModel({from,to});
* [TranslationWorker].loadTranslationModel({from,to}, buffers)
* });
* ```
* @returns {Promise<TranslationModel[]>}
*/
getModels({from, to}) {
const key = JSON.stringify({from, to});
// Note that the `this.models` map stores Promises. This so that
// multiple calls to `getModels` that ask for the same model will
// return the same promise, and the actual lookup is only done once.
// The lookup is async because we need to await `this.registry`
if (!this.models.has(key))
this.models.set(key, this.findModels(from, to));
return this.models.get(key);
}
/**
* Find model (or model pair) to translate from `from` to `to`.
* @param {string} from
* @param {string} to
* @returns {Promise<TranslationModel[]>}
*/
async findModels(from, to) {
const registry = await this.registry;
let direct = [], outbound = [], inbound = [];
registry.forEach(model => {
if (model.from === from && model.to === to)
direct.push(model);
else if (model.from === from && model.to === this.pivotLanguage)
outbound.push(model);
else if (model.to === to && model.from === this.pivotLanguage)
inbound.push(model);
});
if (direct.length)
return [direct[0]];
if (outbound.length && inbound.length)
return [outbound[0], inbound[0]];
throw new Error(`No model available to translate from '${from}' to '${to}'`);
}
}
/**
* Translator balancing between throughput and latency. Can use multiple worker
* threads.
*/
export class BatchTranslator {
/**
* @param {{
* cacheSize?: number,
* useNativeIntGemm?: boolean,
* workers?: number,
* batchSize?: number,
* downloadTimeout?: number,
* workerUrl?: string,
* registryUrl?: string
* pivotLanguage?: string?
* }} options
*/
constructor(options, backing) {
if (!backing)
backing = new TranslatorBacking(options);
this.backing = backing;
/**
* @type {Array<{idle:Boolean, worker:Proxy}>} List of active workers
* (and a flag to mark them idle or not)
*/
this.workers = [];
/**
* Maximum number of workers
* @type {number}
*/
this.workerLimit = Math.max(options?.workers || 0, 1);
/**
* List of batches we push() to & shift() from using `enqueue`.
* @type {{
* id: number,
* key: string,
* priority: number,
* models: TranslationModel[],
* requests: Array<{
* request: TranslationRequest,
* resolve: (response: TranslationResponse),
* reject: (error: Error)
* }>
* }}
*/
this.queue = [];
/**
* batch serial to help keep track of batches when debugging
* @type {Number}
*/
this.batchSerial = 0;
/**
* Number of requests in a batch before it is ready to be translated in
* a single call. Bigger is better for throughput (better matrix packing)
* but worse for latency since you'll have to wait for the entire batch
* to be translated.
* @type {Number}
*/
this.batchSize = Math.max(options?.batchSize || 8, 1);
this.onerror = options?.onerror || (err => console.error('WASM Translation Worker error:', err));
}
/**
* Destructor that stops and cleans up.
*/
async delete() {
// Empty the queue
this.remove(() => true);
// Terminate the workers
this.workers.forEach(({worker}) => worker.terminate());
}
/**
* Makes sure queued work gets send to a worker. Will delay it till `idle`
* to make sure the batches have been filled to some degree. Will keep
* calling itself as long as there is work in the queue, but it does not
* hurt to call it multiple times. This function always returns immediately.
*/
notify() {
setTimeout(async () => {
// Is there work to be done?
if (!this.queue.length)
return;
// Find an idle worker
let worker = this.workers.find(worker => worker.idle);
// No worker free, but space for more?
if (!worker && this.workers.length < this.workerLimit) {
try {
// Claim a place in the workers array (but mark it busy so
// it doesn't get used by any other `notify()` calls).
const placeholder = {idle: false};
this.workers.push(placeholder);
// adds `worker` and `exports` props
Object.assign(placeholder, await this.backing.loadWorker());
// At this point we know our new worker will be usable.
worker = placeholder;
} catch (e) {
this.onerror(new Error(`Could not initialise translation worker: ${e.message}`));
}
}
// If no worker, that's the end of it.
if (!worker)
return;
// Up to this point, this function has not used await, so no
// chance that another call stole our batch since we did the check
// at the beginning of this function and JavaScript is only
// cooperatively parallel.
const batch = this.queue.shift();
// Put this worker to work, marking as busy
worker.idle = false;
try {
await this.consumeBatch(batch, worker.exports);
} catch (e) {
batch.requests.forEach(({reject}) => reject(e));
}
worker.idle = true;
// Is there more work to be done? Do another idleRequest
if (this.queue.length)
this.notify();
});
}
/**
* The only real public call you need!
* ```
* const {target: {text:string}} = await this.translate({
* from: 'de',
* to: 'en',
* text: 'Hallo Welt!',
* html: false, // optional
* priority: 0 // optional, like `nice` lower numbers are translated first
* })
* ```
* @param {TranslationRequest} request
* @returns {Promise<TranslationResponse>}
*/
translate(request) {
const {from, to, priority} = request;
return new Promise(async (resolve, reject) => {
try {
// Batching key: only requests with the same key can be batched
// together. Think same translation model, same options.
const key = JSON.stringify({from, to});
// (Fetching models first because if we would do it between looking
// for a batch and making a new one, we end up with a race condition.)
const models = await this.backing.getModels(request);
// Put the request and its callbacks into a fitting batch
this.enqueue({key, models, request, resolve, reject, priority});
// Tell a worker to pick up the work at some point.
this.notify();
} catch (e) {
reject(e);
}
});
}
/**
* Prune pending requests by testing each one of them to whether they're
* still relevant. Used to prune translation requests from tabs that got
* closed.
* @param {(request:TranslationRequest) => boolean} filter evaluates to true if request should be removed
*/
remove(filter) {
const queue = this.queue;
this.queue = [];
queue.forEach(batch => {
batch.requests.forEach(({request, resolve, reject}) => {
if (filter(request)) {
// Add error.request property to match response.request for
// a resolve() callback. Pretty useful if you don't want to
// do all kinds of Funcion.bind() dances.
reject(Object.assign(new CancelledError('removed by filter'), {request}));
return;
}
this.enqueue({
key: batch.key,
priority: batch.priority,
models: batch.models,
request,
resolve,
reject
});
});
});
}
/**
* Internal function used to put a request in a batch that still has space.
* Also responsible for keeping the batches in order of priority. Called by
* `translate()` but also used when filtering pending requests.
* @param {{request:TranslateRequest, models:TranslationModel[], key:String, priority:Number?, resolve:(TranslateResponse)=>any, reject:(Error)=>any}}
*/
enqueue({key, models, request, resolve, reject, priority}) {
if (priority === undefined)
priority = 0;
// Find a batch in the queue that we can add to
// (TODO: can we search backwards? that would speed things up)
let batch = this.queue.find(batch => {
return batch.key === key
&& batch.priority === priority
&& batch.requests.length < this.batchSize
});
// No batch or full batch? Queue up a new one
if (!batch) {
batch = {id: ++this.batchSerial, key, priority, models, requests: []};
this.queue.push(batch);
this.queue.sort((a, b) => a.priority - b.priority);
}
batch.requests.push({request, resolve, reject});
}
/**
* Internal method that uses a worker thread to process a batch. You can
* wait for the batch to be done by awaiting this call. You should only
* then reuse the worker otherwise you'll just clog up its message queue.
*/
async consumeBatch(batch, worker) {
performance.mark('BergamotBatchTranslator.start');
// Make sure the worker has all necessary models loaded. If not, tell it
// first to load them.
await Promise.all(batch.models.map(async ({from, to}) => {
if (!await worker.hasTranslationModel({from, to})) {
const buffers = await this.backing.getTranslationModel({from, to});
await worker.loadTranslationModel({from, to}, buffers);
}
}));
// Call the worker to translate. Only sending the actually necessary
// parts of the batch to avoid trying to send things that don't survive
// the message passing API between this thread and the worker thread.
const responses = await worker.translate({
models: batch.models.map(({from, to}) => ({from, to})),
texts: batch.requests.map(({request: {text, html, qualityScores}}) => ({
text: text.toString(),
html: !!html,
qualityScores: !!qualityScores
}))
});
// Responses are in! Connect them back to their requests and call their
// callbacks.
batch.requests.forEach(({request, resolve, reject}, i) => {
// TODO: look at response.ok and reject() if it is false
resolve({
request, // Include request for easy reference? Will allow you
// to specify custom properties and use that to link
// request & response back to each other.
...responses[i] // {target: {text: String}}
});
});
performance.measure('BergamotBatchTranslator', 'BergamotBatchTranslator.start');
}
}
/**
* Translator optimised for interactive use.
*/
export class LatencyOptimisedTranslator {
/**
* @type {TranslatorBacking}
*/
backing;
/**
* @type {Promise<{idle:boolean, worker:Worker, exports:Proxy<TranslationWorker>}>}
*/
worker;
/**
* @type {{request: TranslationRequest, accept:(TranslationResponse), reject:(Error)} | null}
*/
pending;
/**
* @param {{
* cacheSize?: number,
* useNativeIntGemm?: boolean,
* downloadTimeout?: number,
* workerUrl?: string,
* registryUrl?: string
* pivotLanguage?: string?
* }} options
*/
constructor(options, backing) {
if (!backing)
backing = new TranslatorBacking(options);
this.backing = backing;
// Exposing the this.loadWorker() returned promise through this.worker
// so that you can use that to catch any errors that happened during
// loading.
this.worker = this.backing.loadWorker().then(worker => ({...worker, idle:true}));
}
/**
* Destructor that stops and cleans up.
*/
async delete() {
// Cancel pending translation
if (this.pending) {
this.pending.reject(new CancelledError('translator got deleted'));
this.pending = null;
}
// Terminate the worker (I don't care if this fails)
try {
const {worker} = await this.worker;
worker.terminate();
} finally {
this.worker = null;
}
}
/**
* Sets `request` as the next translation to process. If there was already
* a translation waiting to be processed, their promise is rejected with a
* SupersededError.
* @param {TranslationRequest} request
* @return {Promise<TranslationResponse>}
*/
translate(request, options) {
if (this.pending)
this.pending.reject(new SupersededError());
return new Promise((accept, reject) => {
const pending = {request, accept, reject, options};
if (options?.signal) {
options.signal.addEventListener('abort', e => {
reject(new CancelledError('abort signal'));
if (this.pending === pending)
this.pending = null;
});
}
this.pending = pending;
this.notify();
});
}
notify() {
setTimeout(async () => {
if (!this.pending)
return;
// Catch errors such as the worker not working
try {
// Possibly wait for the worker to finish loading. After it loaded
// these calls are pretty much instantaneous.
const worker = await this.worker;
// Is another notify() call hogging the worker? Then stop.
if (!worker.idle)
return;
// Claim the pending translation request.
const {request, accept, reject, options} = this.pending;
this.pending = null;
// Mark the worker as occupied
worker.idle = false;
try {
const models = await this.backing.getModels(request)
await Promise.all(models.map(async ({from, to}) => {
if (!await worker.exports.hasTranslationModel({from, to})) {
const buffers = await this.backing.getTranslationModel({from, to}, {signal: options?.signal});
await worker.exports.loadTranslationModel({from, to}, buffers);
}
}));
const {text, html, qualityScores} = request;
const responses = await worker.exports.translate({
models: models.map(({from,to}) => ({from, to})),
texts: [{text, html, qualityScores}]
});
accept({request, ...responses[0]});
} catch (e) {
reject(e);
}
worker.idle = true;
// Is there more work to be done? Do another idleRequest
if (this.pending)
this.notify();
} catch (e) {
this.backing.onerror(e);
}
});
}
}

Просмотреть файл

@ -1,3 +0,0 @@
{
"type": "commonjs"
}

Просмотреть файл

@ -1,475 +0,0 @@
/**
* Wrapper around the dirty bits of Bergamot's WASM bindings.
*/
// Global because importScripts is global.
var Module = {};
/**
* node.js compatibility: Fake GlobalWorkerScope that emulates being inside a
* WebWorker
*/
if (typeof self === 'undefined') {
global.Module = Module;
global.self = new class GlobalWorkerScope {
/** @type {import("node:worker_threads").MessagePort} */
#port;
constructor() {
const {parentPort} = require(/* webpackIgnore: true */ 'node:worker_threads');
this.#port = parentPort;
}
/**
* Add event listener to listen for messages posted to the worker.
* @param {string} eventName
* @param {(object)} callback
*/
addEventListener(eventName, callback) {
this.#port.on(eventName, (data) => callback({data}));
}
/**
* Post message outside, to the owner of the Worker.
* @param {any} message
*/
postMessage(message) {
this.#port.postMessage(message);
}
/**
* @param {...string} scripts - Paths to scripts to import in that order
*/
importScripts(...scripts) {
const {readFileSync} = require(/* webpackIgnore: true */ 'node:fs');
const {join} = require(/* webpackIgnore: true */ 'node:path');
for (let pathname of scripts) {
const script = readFileSync(join(__dirname, pathname), {encoding: 'utf-8'});
eval.call(global, script);
}
}
/**
* Adds support for local file urls. Assumes anything that doesn't start
* with "http" to be a local path.
* @param {string} url - path or url
* @param {object?} options - See `fetch()` options
* @return {Promise<Response>}
*/
async fetch(url, options) {
if (url.protocol === 'file:') {
const {readFile} = require(/* webpackIgnore: true */ 'node:fs/promises');
const buffer = await readFile(url.pathname);
const blob = new Blob([buffer]);
return new Response(blob, {
status: 200,
statusText: 'OK',
headers: {
'Content-Type': 'application/wasm',
'Content-Length': blob.size.toString()
}
});
}
return await fetch(url, options);
}
get location() {
return new URL(`file://${__filename}`);
}
}
}
class YAML {
/**
* Parses YAML into dictionary. Does not interpret types, all values are a
* string or a list of strings. No support for objects other than the top
* level.
* @param {string} yaml
* @return {{[string]: string | string[]}}
*/
static parse(yaml) {
const out = {};
yaml.split('\n').reduce((key, line, i) => {
let match;
if (match = line.match(/^\s*-\s+(.+?)$/)) {
if (!Array.isArray(out[key]))
out[key] = out[key].trim() ? [out[key]] : [];
out[key].push(match[1].trim());
}
else if (match = line.match(/^\s*([A-Za-z0-9_][A-Za-z0-9_-]*):\s*(.*)$/)) {
key = match[1];
out[key] = match[2].trim();
}
else if (!line.trim()) {
// whitespace, ignore
}
else {
throw Error(`Could not parse line ${i+1}: "${line}"`);
}
return key;
}, null);
return out;
}
/**
* Turns an object into a YAML string. No support for objects, only simple
* types and lists of simple types.
* @param {{[string]: string | number | boolean | string[]}} data
* @return {string}
*/
static stringify(data) {
return Object.entries(data).reduce((str, [key, value]) => {
let valstr = '';
if (Array.isArray(value))
valstr = value.map(val => `\n - ${val}`).join('');
else if (typeof value === 'number' || typeof value === 'boolean' || value.match(/^\d*(\.\d+)?$/))
valstr = `${value}`;
else
valstr = `${value}`; // Quote?
return `${str}${key}: ${valstr}\n`;
}, '');
}
}
/**
* Wrapper around the bergamot-translator exported module that hides the need
* of working with C++ style data structures and does model management.
*/
class BergamotTranslatorWorker {
/**
* Map of expected symbol -> name of fallback symbol for functions that can
* be swizzled for a faster implementation. Firefox Nightly makes use of
* this.
*/
static GEMM_TO_FALLBACK_FUNCTIONS_MAP = {
'int8_prepare_a': 'int8PrepareAFallback',
'int8_prepare_b': 'int8PrepareBFallback',
'int8_prepare_b_from_transposed': 'int8PrepareBFromTransposedFallback',
'int8_prepare_b_from_quantized_transposed': 'int8PrepareBFromQuantizedTransposedFallback',
'int8_prepare_bias': 'int8PrepareBiasFallback',
'int8_multiply_and_add_bias': 'int8MultiplyAndAddBiasFallback',
'int8_select_columns_of_b': 'int8SelectColumnsOfBFallback'
};
/**
* Name of module exported by Firefox Nightly that exports an optimised
* implementation of the symbols mentioned above.
*/
static NATIVE_INT_GEMM = 'mozIntGemm';
/**
* Empty because we can't do async constructors yet. It is the
* responsibility of whoever owns this WebWorker to call `initialize()`.
*/
constructor(options) {}
/**
* Instantiates a new translation worker with optional options object.
* If this call succeeds, the WASM runtime is loaded and ready.
*
* Available options are:
* useNativeIntGemm: {true | false} defaults to false. If true, it will
* attempt to link to the intgemm module available in
* Firefox Nightly which makes translations much faster.
* cacheSize: {Number} defaults to 0 which disables translation
* cache entirely. Note that this is a theoretical
* upper bound. In practice it will use about 1/3th of
* the cache specified here. 2^14 is not a bad starting
* value.
* @param {{useNativeIntGemm: boolean, cacheSize: number}} options
*/
async initialize(options) {
this.options = options || {};
this.models = new Map(); // Map<str,Promise<TranslationModel>>
this.module = await this.loadModule();
this.service = await this.loadTranslationService();
}
/**
* Tries to load native IntGEMM module for bergamot-translator. If that
* fails because it or any of the expected functions is not available, it
* falls back to using the naive implementations that come with the wasm
* binary itself through `linkFallbackIntGemm()`.
* @param {{env: {memory: WebAssembly.Memory}}} info
* @return {{[method:string]: (...any) => any}}
*/
linkNativeIntGemm(info) {
if (!WebAssembly['mozIntGemm']) {
console.warn('Native gemm requested but not available, falling back to embedded gemm');
return this.linkFallbackIntGemm(info);
}
const instance = new WebAssembly.Instance(WebAssembly['mozIntGemm'](), {
'': {memory: info['env']['memory']}
});
if (!Array.from(Object.keys(BergamotTranslatorWorker.GEMM_TO_FALLBACK_FUNCTIONS_MAP)).every(fun => instance.exports[fun])) {
console.warn('Native gemm is missing expected functions, falling back to embedded gemm');
return this.linkFallbackIntGemm(info);
}
return instance.exports;
}
/**
* Links intgemm functions that are already available in the wasm binary,
* but just exports them under the name that is expected by
* bergamot-translator.
* @param {{env: {memory: WebAssembly.Memory}}} info
* @return {{[method:string]: (...any) => any}}
*/
linkFallbackIntGemm(info) {
const mapping = Object.entries(BergamotTranslatorWorker.GEMM_TO_FALLBACK_FUNCTIONS_MAP).map(([key, name]) => {
return [key, (...args) => Module['asm'][name](...args)]
});
return Object.fromEntries(mapping);
}
/**
* Internal method. Reads and instantiates the WASM binary. Returns a
* promise for the exported Module object that contains all the classes
* and functions exported by bergamot-translator.
* @return {Promise<BergamotTranslator>}
*/
loadModule() {
return new Promise(async (resolve, reject) => {
try {
const response = await self.fetch(new URL('./bergamot-translator-worker.wasm', self.location));
Object.assign(Module, {
instantiateWasm: (info, accept) => {
try {
WebAssembly.instantiateStreaming(response, {
...info,
'wasm_gemm': this.options.useNativeIntGemm
? this.linkNativeIntGemm(info)
: this.linkFallbackIntGemm(info)
}).then(({instance}) => accept(instance)).catch(reject);
} catch (err) {
reject(err);
}
return {};
},
onRuntimeInitialized: () => {
resolve(Module);
}
});
// Emscripten glue code. Webpack et al. should not mangle the `Module` property name!
self.Module = Module;
self.importScripts('bergamot-translator-worker.js');
} catch (err) {
reject(err);
}
});
}
/**
* Internal method. Instantiates a BlockingService()
* @return {BergamotTranslator.BlockingService}
*/
loadTranslationService() {
return new this.module.BlockingService({
cacheSize: Math.max(this.options.cacheSize || 0, 0)
});
}
/**
* Returns whether a model has already been loaded in this worker. Marked
* async because the message passing interface we use expects async methods.
* @param {{from:string, to:string}}
* @return boolean
*/
hasTranslationModel({from,to}) {
const key = JSON.stringify({from,to});
return this.models.has(key);
}
/**
* Loads a translation model from a set of file buffers. After this, the
* model is available to translate with and `hasTranslationModel()` will
* return true for this pair.
* @param {{from:string, to:string}}
* @param {{
* model: ArrayBuffer,
* shortlist: ArrayBuffer,
* vocabs: ArrayBuffer[],
* qualityModel: ArrayBuffer?,
* config?: {
* [key:string]: string
* }
* }} buffers
*/
loadTranslationModel({from, to}, buffers) {
// This because service_bindings.cpp:prepareVocabsSmartMemories :(
const uniqueVocabs = buffers.vocabs.filter((vocab, index, vocabs) => {
return !vocabs.slice(0, index).includes(vocab);
});
const [modelMemory, shortlistMemory, qualityModel, ...vocabMemory] = [
this.prepareAlignedMemoryFromBuffer(buffers.model, 256),
this.prepareAlignedMemoryFromBuffer(buffers.shortlist, 64),
buffers.qualityModel // optional quality model
? this.prepareAlignedMemoryFromBuffer(buffers.qualityModel, 64)
: null,
...uniqueVocabs.map(vocab => this.prepareAlignedMemoryFromBuffer(vocab, 64))
];
const vocabs = new this.module.AlignedMemoryList();
vocabMemory.forEach(vocab => vocabs.push_back(vocab));
// Defaults
let modelConfig = YAML.parse(`
beam-size: 1
normalize: 1.0
word-penalty: 0
cpu-threads: 0
gemm-precision: int8shiftAlphaAll
skip-cost: true
`);
if (buffers.config)
Object.assign(modelConfig, buffers.config);
// WASM marian is only compiled with support for shiftedAll.
if (modelConfig['gemm-precision'] === 'int8')
modelConfig['gemm-precision'] = 'int8shiftAll';
// Override these
Object.assign(modelConfig, YAML.parse(`
alignment: soft
quiet: true
quiet-translation: true
max-length-break: 128
mini-batch-words: 1024
workspace: 128
max-length-factor: 2.0
`));
const key = JSON.stringify({from,to});
this.models.set(key, new this.module.TranslationModel(YAML.stringify(modelConfig), modelMemory, shortlistMemory, vocabs, qualityModel));
}
/**
* Frees up memory used by old translation model. Does nothing if model is
* already deleted.
* @param {{from:string, to:string}}
*/
freeTranslationModel({from, to}) {
const key = JSON.stringify({from,to});
if (!this.models.has(key))
return;
const model = this.models.get(key);
this.models.delete(key);
model.delete();
}
/**
* Internal function. Copies the data from an ArrayBuffer into memory that
* can be used inside the WASM vm by Marian.
* @param {{ArrayBuffer}} buffer
* @param {number} alignmentSize
* @return {BergamotTranslator.AlignedMemory}
*/
prepareAlignedMemoryFromBuffer(buffer, alignmentSize) {
const bytes = new Int8Array(buffer);
const memory = new this.module.AlignedMemory(bytes.byteLength, alignmentSize);
memory.getByteArrayView().set(bytes);
return memory;
}
/**
* Public. Does actual translation work. You have to make sure that the
* models necessary for translating text are already loaded before calling
* this method. Returns a promise with translation responses.
* @param {{models: {from:string, to:string}[], texts: {text: string, html: boolean}[]}}
* @return {Promise<{target: {text: string}}[]>}
*/
translate({models, texts}) {
// Convert texts array into a std::vector<std::string>.
let input = new this.module.VectorString();
texts.forEach(({text}) => input.push_back(text));
// Extracts the texts[].html options into ResponseOption objects
let options = new this.module.VectorResponseOptions();
texts.forEach(({html, qualityScores}) => options.push_back({alignment: false, html, qualityScores}));
// Turn our model names into a list of TranslationModel pointers
const translationModels = models.map(({from,to}) => {
const key = JSON.stringify({from,to});
return this.models.get(key);
});
// translate the input, which is a vector<String>; the result is a vector<Response>
const responses = models.length > 1
? this.service.translateViaPivoting(...translationModels, input, options)
: this.service.translate(...translationModels, input, options);
input.delete();
options.delete();
// Convert the Response WASM wrappers into native JavaScript types we
// can send over the 'wire' (message passing) in the same format as we
// use in bergamot-translator.
const translations = texts.map((_, i) => ({
target: {
text: responses.get(i).getTranslatedText()
}
}));
responses.delete();
return translations;
}
}
/**
* Because you can't put an Error object in a message. But you can post a
* generic object!
* @param {Error} error
* @return {{
* name: string?,
* message: string?,
* stack: string?
* }}
*/
function cloneError(error) {
return {
name: error.name,
message: error.message,
stack: error.stack
};
}
// (Constructor doesn't really do anything, we need to call `initialize()`
// first before using it. That happens from outside the worker.)
const worker = new BergamotTranslatorWorker();
self.addEventListener('message', async function({data: {id, name, args}}) {
if (!id)
console.error('Received message without id', arguments[0]);
try {
if (typeof worker[name] !== 'function')
throw TypeError(`worker[${name}] is not a function`);
// Using `Promise.resolve` to await any promises that worker[name]
// possibly returns.
const result = await Promise.resolve(Reflect.apply(worker[name], worker, args));
self.postMessage({id, result});
} catch (error) {
self.postMessage({
id,
error: cloneError(error)
})
}
});

Просмотреть файл

@ -1,175 +0,0 @@
#!/usr/bin/env node
/**
* A note upfront: the bergamot-translator API is pretty low level, and
* embedding it successfully requires some knowledge about the WebWorkers and
* WebAssembly APIs. This script tries to demonstrate the bergamot-translator
* API with as little of that boiler plate code as possible.
* See the wasm/test_page code for a fully fleshed out demo in a web context.
*/
// For node we use the fs module to read local files. In a web context you can
// use `fetch()` for everything.
const fs = require('fs');
// Read wasm binary into a blob, which will be loaded by
// bergamot-translator-worker.js in a minute. In a web context, you'd be using
// `fetch(...).then(response => response.blob())` for this, but Node does not
// implement `fetch("file://...")` yet.
const wasmBinary = fs.readFileSync('./bergamot-translator-worker.wasm');
// Read wasm runtime code that bridges the bergmot-translator binary with JS.
const wasmRuntime = fs.readFileSync('./bergamot-translator-worker.js', {encoding: 'utf8'});
// Initialise the `Module` object. By adding methods and options to this, we can
// affect how bergamot-translator interacts with JavaScript. See
// https://emscripten.org/docs/api_reference/module.html for all available
// options. It is important that this object is initialised in the same scope
// but before `bergamot-translation-worker.js` is executed. Once that script
// executes, it defines the exported methods as properties of this Module
// object.
global.Module = {
wasmBinary,
onRuntimeInitialized
};
// Execute bergamot-translation-worker.js in this scope. This will also,
// indirectly, call the onRuntimeInitialized function defined below and
// referenced in the `Module` object above.
eval.call(global, wasmRuntime);
/**
* Called from inside the bergamot-translation-worker.js script once the wasm
* module is initialized. At this point that `Module` object that was
* initialised above will have all the classes defined in the
* bergamot-translator API available on it.
*/
async function onRuntimeInitialized() {
// Root url for our models for now.
const root = 'https://storage.googleapis.com/bergamot-models-sandbox/0.3.1';
// Urls of data files necessary to create a translation model for
// English -> German. Note: list is in order of TranslationModel's arguments.
// The `alignment` value is used later on to load each part of the model with
// the correct alignment.
const files = [
// Neural network and weights:
{url: `${root}/ende/model.ende.intgemm.alphas.bin`, alignment: 256},
// Lexical shortlist which is mainly a speed improvement method, not
// strictly necessary:
{url: `${root}/ende/lex.50.50.ende.s2t.bin`, alignment: 64},
// Vocabulary, maps the input and output nodes of the neural network to
// strings. Note: "deen" may look the wrong way around but vocab is the same
// between de->en and en->de models.
{url: `${root}/ende/vocab.deen.spm`, alignment: 64},
];
// Download model data and load it into aligned memory. AlignedMemory is a
// necessary wrapper around allocated memory inside the WASM environment.
// The value of `alignment` is specific for which part of the model we're
// loading. See https://en.wikipedia.org/wiki/Data_structure_alignment for a
// more general explanation.
const [modelMem, shortlistMem, vocabMem] = await Promise.all(files.map(async (file) => {
const response = await fetch(file.url);
const blob = await response.blob();
const buffer = await blob.arrayBuffer();
const bytes = new Int8Array(buffer);
const memory = new Module.AlignedMemory(bytes.byteLength, file.alignment);
memory.getByteArrayView().set(bytes);
return memory;
}));
// Set up translation service. This service translates a batch of text per
// call. The larger the batch, the faster the translation (in words per
// second) happens, but the longer you have to wait for all of them to finish.
// The constructor expects an object with options, but only one option is
// currently supported: `cacheSize`. Setting this to `0` disables the
// translation cache.
// **Note**: cacheSize is the theoretical maximum number of sentences that
// will be cached. In practise, about 1/3 of that will actually be used.
// See https://github.com/XapaJIaMnu/translateLocally/pull/75
const service = new Module.BlockingService({cacheSize: 0});
// Put vocab into its own std::vector<AlignedMemory>. Most models for the
// Bergamot project only have one vocabulary that is shared by both the input
// and output side of the translator. But in theory, you could have one for
// the input side and a different one for the output side. Hence: a list.
const vocabs = new Module.AlignedMemoryList();
vocabs.push_back(vocabMem);
// Config yaml (split as array to allow for indentation without adding tabs
// or spaces to the strings themselves.)
// See https://marian-nmt.github.io/docs/cmd/marian-decoder/ for the meaning
// of most of these options and what other options might be available.
const config = [
'beam-size: 1',
'normalize: 1.0',
'word-penalty: 0',
'alignment: soft', // is necessary if you want to use HTML at any point
'max-length-break: 128',
'mini-batch-words: 1024',
'workspace: 128',
'max-length-factor: 2.0',
'skip-cost: true',
'gemm-precision: int8shiftAll', // is necessary for speed and compatibility with Mozilla's models.
].join('\n');
// Setup up model with config yaml and AlignedMemory objects. Optionally a
// quality estimation model can also be loaded but this is not demonstrated
// here. Generally you don't need it, and many models don't include the data
// file necessary to use it anyway.
const model = new Module.TranslationModel(config, modelMem, shortlistMem, vocabs, /*qualityModel=*/ null);
// Construct std::vector<std::string> inputs; This is our batch!
const input = new Module.VectorString();
input.push_back('<p>Hello world! Let us write a second sentence.</p> &amp; <p>Goodbye World!</p>');
input.push_back('This is a second example without HTML & entities.');
// Construct std::vector<ResponseOptions>, one entry per input. Note that
// all these three properties of your ResponseOptions object need to be
// specified for each entry.
// `qualityScores`: related to quality models not explained here. Set this
// to `false`.
// `alignment`: computes alignment scores that maps parts of the input text
// to parts of the output text. There is currently no way to get these
// mappings out through the JavaScript API so I suggest you set this to
// `false` as well.
// `html`: is the input HTML? If so, the HTML will be parsed and the markup
// will be copied back into the translated output. Note: HTML has to be
// valid HTML5, with proper closing tags and everything since the HTML
// parser built into bergamot-translator does no error correction. Output
// of e.g. `Element.innerHTML` meets this criteria.
const options = new Module.VectorResponseOptions();
options.push_back({qualityScores: false, alignment: false, html: true});
options.push_back({qualityScores: false, alignment: false, html: false});
// Size of `input` and `options` has to match.
console.assert(input.size() === options.size());
// Translate our batch of 2 requests. Output will be another vector of type
// `std::vector<Response>`.
const output = service.translate(model, input, options);
console.assert(false);
// Number of outputs is number of inputs.
console.assert(input.size() === output.size());
for (let i = 0; i < output.size(); ++i) {
// Get output from std::vector<Response>.
const translation = output.get(i).getTranslatedText();
// Print raw translation for inspection.
console.log(translation)
}
// Clean-up: unlike the objects in JavaScript, the objects in the WASM
// environment are not automatically cleaned up when they're no longer
// referenced. That is why we manually have to call `delete()` on them
// when we're done with them.
input.delete();
options.delete();
output.delete();
}

Просмотреть файл

@ -1,93 +0,0 @@
const http = require('http');
const https = require('https')
const express = require('express');
const app = express();
const server = http.createServer(app);
const fs = require('fs');
const url = require('url');
const nocache = require('nocache');
const cors = require('cors');
const path = require('path');
let port = 8000;
if (process.argv[2]) {
port = process.argv[2];
}
let skipssl = 0;
if (process.argv[3]) {
skipssl = process.argv[3];
}
let certpath = "/etc/letsencrypt";
if (process.argv[4]) {
certpath = process.argv[4];
}
app.use(cors())
app.use(nocache());
app.get('/', cors(), function(req, res) {
if (!req.secure && skipssl != 1) {
return res.redirect("https://" + req.headers.host + req.url);
}
res.sendFile(path.join(__dirname + '/index.html'));
res.header('Cross-Origin-Embedder-Policy','require-corp');
res.header('Cross-Origin-Opener-Policy','same-origin');
res.header('Cross-Origin-Resource-Policy','same-origin');
});
app.get('/*.*' , cors(), function(req, res) {
var options = url.parse(req.url, true);
var mime = Helper.getMime(options);
serveFile(res, options.pathname, mime);
});
function serveFile(res, pathName, mime) {
mime = mime || 'text/html';
fs.readFile(__dirname + '/' + pathName, function (err, data) {
if (err) {
res.writeHead(500, {"Content-Type": "text/plain"});
return res.end('Error loading ' + pathName + " with Error: " + err);
}
res.header('Cross-Origin-Embedder-Policy','require-corp');
res.header('Cross-Origin-Opener-Policy','same-origin');
res.header('Cross-Origin-Resource-Policy','same-origin');
res.writeHead(200, {"Content-Type": mime});
res.end(data);
});
}
if (skipssl != 1){
https.createServer({
key: fs.readFileSync(`${certpath}/privkey.pem`),
cert: fs.readFileSync(`${certpath}/cert.pem`),
ca: fs.readFileSync(`${certpath}/chain.pem`),
},
app
).listen(443, () => {
console.log('Listening https port 443')
})
}
const Helper = {
types: {
"wasm" : "application/wasm"
, "js" : "application/javascript"
, "html" : "text/html"
, "htm" : "text/html"
, "ico" : "image/vnd.microsoft.icon"
, "css" : "text/css"
},
getMime: function(u) {
var ext = this.getExt(u.pathname).replace('.', '');
return this.types[ext.toLowerCase()] || 'application/octet-stream';
},
getExt: function(path) {
var i = path.lastIndexOf('.');
return (i < 0) ? '' : path.substr(i);
}
};
server.listen(port);
console.log(`HTTP and BinaryJS server started on port ${port}`);

Просмотреть файл

@ -1,168 +0,0 @@
* {
box-sizing: border-box;
}
html,
body {
height: 100%;
margin: 0;
font-size: 18px;
font-family: Optima, Helvetica, Arial;
}
body {
padding: 1rem;
}
[hidden] {
display: none;
}
.app {
padding: 1rem;
display: grid;
grid: "from swap to" auto "credits credits credits" min-content / 1fr auto 1fr;
grid-gap: 1rem;
overflow: hidden;
min-height: 100%;
max-width: 1024px;
margin: 0 auto;
}
.swap::before {
display: inline-block;
content: '↔️';
}
@media screen and (max-width: 640px) {
.app {
grid: "from from" auto "swap swap" auto "to to" auto "credits credits" auto / 1fr;
}
.swap::before {
content: '↕️';
}
}
.panel {
display: grid;
grid-template-rows: auto 1fr;
grid-gap: 1rem;
max-height: 100%;
overflow: hidden;
}
label {
padding: 0 0.5em;
display: flex;
align-items: center;
}
.lang-select {
padding: 0.25rem 0.5rem;
margin-left: 1rem;
background: #f4f4f4;
font-size: 0.9rem;
border: 1px solid #ccc;
border-radius: 0.25rem;
cursor: pointer;
}
.panel--from {
grid-area: from;
}
.panel--to {
grid-area: to;
}
.swap {
align-self: center;
grid-area: swap;
font-size: 1.1rem;
}
.credits {
grid-area: credits;
}
.credits img {
float: left;
margin: 1em 0;
}
textarea, [contenteditable], .output-area {
padding: 1rem;
font-family: sans-serif;
font-size: 1rem;
resize: none;
border-radius: 2px;
border: 1px solid #ccc;
min-height: 100px;
max-height: 100%;
overflow: auto;
}
button {
cursor: pointer;
border: 1px solid #88c;
border-radius: 4px;
background: #eef;
padding: 0;
padding: 0.25rem 0.5rem;
}
button:hover {
background: #cce;
}
#output {
background-color: #f4f4f4;
position: relative;
}
.output-area [x-bergamot-word-score].bad {
background-image:
linear-gradient(45deg, transparent 65%, red 80%, transparent 90%),
linear-gradient(135deg, transparent 5%, red 15%, transparent 25%),
linear-gradient(135deg, transparent 45%, red 55%, transparent 65%),
linear-gradient(45deg, transparent 25%, red 35%, transparent 50%);
background-repeat:repeat-x;
background-size: 8px 2px;
background-position:0 95%;
}
.output-area [x-bergamot-sentence-score].bad {
background: rgba(255, 128, 128, 0.8);
}
.output-area [x-bergamot-sentence-index].highlight-sentence {
background: rgba(255, 255, 128, 0.8);
}
.app.translating #output::after {
position: absolute;
bottom: 4px;
right: 4px;
content: 'Translating…';
}
/* Loading indicator takes priority, so below the .translating selector */
.app.loading #output::after {
position: absolute;
bottom: 4px;
right: 4px;
content: 'Loading translation model…';
}
.app {
position: relative;
}
#unsupported-browser {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: white;
}

Просмотреть файл

@ -1,41 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Bergamot Translations</title>
<link rel="stylesheet" href="css/index.css" />
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
<meta
name="viewport"
content="width=device-width, initial-scale=1.0, viewport-fit=cover"
/>
</head>
<body>
<div class="app">
<div class="panel panel--from">
<label>
From
<select id="lang-from" name="from" class="lang-select"></select>
</label>
<div id="input" contenteditable="true"></div>
</div>
<button class="swap" title="swap"></button>
<div class="panel panel--to">
<label>
To
<select id="lang-to" name="to" class="lang-select"></select>
</label>
<div id="output" class="output-area"></div>
</div>
<div id="unsupported-browser" hidden>
<p>Your CPU or browser is not able to run Bergamot translator.</p>
<p>Try using Firefox or a Chromium based browser with <a href="https://webassembly.org/roadmap/">Fixed-width SIMD support</a>.</p>
<p>If you already are, you might be using a CPU that does not have support for SSE4.1 instructions.</p>
</div>
<footer class="credits">
<img src="logos.png" alt="Logos of the OPUS project, the Bergamot project and the European Union.">
<p>This project has received funding from the European Unions Horizon 2020 research and innovation programme under grant agreement No 825303.</p>
</footer>
</div>
<script type="module" src="js/index.js"></script>
</body>
</html>

Просмотреть файл

@ -1,215 +0,0 @@
import {LatencyOptimisedTranslator, TranslatorBacking, CancelledError, SupersededError} from '../node_modules/@browsermt/bergamot-translator/translator.js';
function $(selector) {
return document.querySelector(selector);
}
function $$(selector) {
return document.querySelectorAll(selector);
}
function encodeHTML(text) {
const div = document.createElement('div');
div.appendChild(document.createTextNode(text));
return div.innerHTML;
}
function addQualityIndicators() {
$$('#output [x-bergamot-sentence-score]').forEach(el => {
// The threshold is ln(0.5) (https://github.com/browsermt/bergamot-translator/pull/370#issuecomment-1058123399)
el.classList.toggle('bad', parseFloat(el.getAttribute('x-bergamot-sentence-score')) < Math.log(0.5));
});
$$('#output [x-bergamot-word-score]').forEach(el => {
// The threshold is ln(0.5) (https://github.com/browsermt/bergamot-translator/pull/370#issuecomment-1058123399)
el.classList.toggle('bad', parseFloat(el.getAttribute('x-bergamot-word-score')) < Math.log(0.5));
});
// Add tooltips to each (sub)word with sentence and word score.
$$('#output [x-bergamot-sentence-score] > [x-bergamot-word-score]').forEach(el => {
const sentenceScore = parseFloat(el.parentNode.getAttribute('x-bergamot-sentence-score'));
const wordScore = parseFloat(el.getAttribute('x-bergamot-word-score'));
el.title = `Sentence: ${Math.exp(sentenceScore).toFixed(2)} Word: ${Math.exp(wordScore).toFixed(2)}`;
});
}
function highlightSentence(element) {
const sentence = element.parentNode.hasAttribute('x-bergamot-sentence-index')
? element.parentNode.getAttribute('x-bergamot-sentence-index')
: null;
$$('#output font[x-bergamot-sentence-index]').forEach(el => {
el.classList.toggle('highlight-sentence', el.getAttribute('x-bergamot-sentence-index') === sentence);
})
}
/**
* Very minimal WISYWIG editor. Just keyboard shortcuts for the IYKYK crowd.
*/
class Editor {
constructor(root) {
this.isApple = window.navigator.platform.startsWith('Mac');
this.root = root;
this.root.addEventListener('keydown', this.onkeydown.bind(this));
this.mapping = {
"b": "bold",
"i": "italic",
"u": "underline",
};
}
onkeydown(event) {
if (!(this.isApple ? event.metaKey : event.ctrlKey))
return;
if (!(event.key in this.mapping))
return;
document.execCommand(this.mapping[event.key], false, null);
event.preventDefault();
}
}
async function main() {
const options = {
cacheSize: 2^13,
downloadTimeout: null // Disable timeout
};
const backing = new TranslatorBacking(options);
let pending = 0; // Number of pending requests
// Patch the fetch() function to track number of pending requests
backing.fetch = async function(...args) {
try {
$('.app').classList.toggle('loading', ++pending > 0);
return await TranslatorBacking.prototype.fetch.call(backing, ...args);
} finally {
$('.app').classList.toggle('loading', --pending > 0);
}
};
// Wait for the language model registry to load. Once it is loaded, use
// it to fill the "from" and "to" language selection dropdowns.
await backing.registry.then(models => {
const names = new Intl.DisplayNames(['en'], {type: 'language'});
['from', 'to'].forEach(field => {
const languages = new Set(models.map(model => model[field]));
const select = $(`#lang-${field}`);
const pairs = Array.from(languages, code => ({code, name: names.of(code)}));
pairs.sort(({name: a}, {name: b}) => a.localeCompare(b));
pairs.forEach(({name, code}) => {
select.add(new Option(name, code));
})
});
$('#lang-from').value = 'en';
$('#lang-to').value = 'es';
});
// Intentionally do this after querying backing.registry to make sure that
// that request is fired off first. Now we can start thinking about loading
// the WASM binary etc.
const translator = new LatencyOptimisedTranslator(options, backing);
let abortController = new AbortController();
const translate = async () => {
try {
const from = $('#lang-from').value;
const to = $('#lang-to').value;
// Querying models to see whether quality estimation is supported by all
// of them.
const models = await backing.getModels({from, to});
const qualityScores = models.every(model => 'qualityModel' in model.files);
$('.app').classList.add('translating');
const response = await translator.translate({
from,
to,
text: $('#input').innerHTML,
html: true,
qualityScores
}, {signal: abortController.signal});
$('#output').innerHTML = response.target.text;
$('#output').classList.toggle('has-quality-scores', qualityScores);
if (qualityScores)
addQualityIndicators();
} catch (error) {
// Ignore errors caused by changing the language pair (which triggers abort())
if (error.constructor === CancelledError) {
return;
}
// Ignore 'errors' caused by typing too fast or by changing the language
// pair while a translation was still in progress (or being loaded)
if (error.constructor === SupersededError || error.constructor === CancelledError)
return;
// Ignore errors caused by selecting a bad pair (e.g. en -> en)
if (error.message.startsWith('No model available to translate from'))
return;
alert(`Error during translation: ${error}\n\n${error.stack}`);
} finally {
const worker = await Promise.race([translator.worker, Promise.resolve(null)]);
$('.app').classList.toggle('translating', worker === null || !worker.idle);
}
}
const reset = async () => {
// Cancel any pending loading/translation
abortController.abort();
// Reset abort controller to a fresh un-aborted one
abortController = new AbortController();
// Clear output to make it more clear something is happening
$('#output').innerHTML = '';
// Immediately start loading the new selection
translate();
}
$('button.swap').addEventListener('click', () => {
const tmp = $('#lang-from').value;
$('#lang-from').value = $('#lang-to').value;
$('#lang-to').value = tmp;
translate();
})
// Simple WYSIWYG controls
const editor = new Editor($('#input'));
// Translate on any change
$('#input').addEventListener('input', translate);
$('#lang-from').addEventListener('input', reset);
$('#lang-to').addEventListener('input', reset);
// Hook up sentence boundary highlighting if that information is available.
$('#output').addEventListener('mouseover', (e) => highlightSentence(e.target))
// Wait for bergamot-translator to load. This could throw a CompileError
// which we want to catch so we can show "oh noes browser not supported!"
translator.worker.catch(error => {
// Catch CompileErrors because for those we know what to do.
if (error.name === 'CompileError')
$('#unsupported-browser').hidden = false;
else
throw error;
});
}
main();

Двоичные данные
inference/wasm/test_page/logos.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 15 KiB

1076
inference/wasm/test_page/package-lock.json сгенерированный

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,14 +0,0 @@
{
"dependencies": {
"@browsermt/bergamot-translator": "file:../module",
"cors": "^2.8.5",
"express": "^4.18.2",
"nocache": "^2.1.0"
},
"config": {
"port": 80
},
"scripts": {
"start": "node ./bergamot-httpserver.js $npm_package_config_port 1 0"
}
}

Просмотреть файл

@ -1,39 +0,0 @@
#!/bin/bash
usage="Copy wasm artifacts from the given folder and start httpserver
Usage: $(basename "$0") [ARTIFACTS_SOURCE_FOLDER]
where:
ARTIFACTS_SOURCE_FOLDER Directory containing pre-built wasm artifacts"
SCRIPT_ABSOLUTE_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters passed"
echo "$usage"
exit
fi
# Check if ARTIFACTS_SOURCE_FOLDER is valid or not
if [ ! -e "$1" ]; then
echo "Error: Folder \""$1"\" doesn't exist"
exit
fi
# Prepare a list all wasm artifacts to be copied and copy them to the destination folder
ARTIFACTS_BASE_NAME="bergamot-translator-worker"
ARTIFACTS="$1/$ARTIFACTS_BASE_NAME.js $1/$ARTIFACTS_BASE_NAME.wasm"
ARTIFACTS_DESTINATION_FOLDER=$SCRIPT_ABSOLUTE_PATH/../module/worker
for i in $ARTIFACTS; do
[ -f "$i" ] || breaks
cp $i $ARTIFACTS_DESTINATION_FOLDER
echo "Copied \"$i\" to \"$ARTIFACTS_DESTINATION_FOLDER\""
done
# Start http server
(cd $SCRIPT_ABSOLUTE_PATH;
npm install;
echo "Start httpserver";
node bergamot-httpserver.js 80 1 0)

1
inference/wasm/tests/.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
node_modules/

1309
inference/wasm/tests/package-lock.json сгенерированный Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,17 @@
{
"name": "wasm-tests",
"version": "1.0.0",
"directories": {
"test": "tests"
},
"scripts": {
"test": "vitest --run",
"test:watch": "vitest"
},
"keywords": [],
"license": "MPL-2.0",
"description": "WASM tests for the inference engine.",
"devDependencies": {
"vitest": "^2.1.4"
}
}

Просмотреть файл

@ -0,0 +1,7 @@
import { describe, it, expect } from 'vitest';
describe('Basic Test Suite', () => {
it('should pass a basic test', () => {
expect(1 + 1).toBe(2);
});
});

Просмотреть файл

@ -44,7 +44,7 @@ tasks:
task inference-build
test-local:
description: "Run local-build tests for the inference engine"
description: "Run build-local C++ tests for the inference engine"
dependencies:
build: inference-build-local
run-on-tasks-for: ["github-pull-request"]
@ -53,7 +53,7 @@ tasks:
- bash
- -c
- >-
task inference-test
task inference-test-local
build-wasm:
description: "Build the wasm bindings for the inference engine"
@ -65,4 +65,16 @@ tasks:
- bash
- -c
- >-
task inference-build-wasm
task inference-build-wasm
test-wasm:
description: "Run build-wasm JS tests for the inference engine"
dependencies:
build-wasm: inference-build-wasm
run-on-tasks-for: ["github-pull-request"]
run:
command:
- bash
- -c
- >-
task inference-test-wasm

Просмотреть файл

@ -3,6 +3,7 @@
import argparse
import subprocess
import os
import platform
import sys
@ -43,6 +44,10 @@ def main():
"/builds/worker/checkouts",
]
# Export the host operating system as an environment variable within the container.
host_os = platform.system()
docker_command.extend(["--env", f"HOST_OS={host_os}"])
# Add additional volumes if provided
if args.volume:
for volume in args.volume: