Setup WASM test infrastructure for CI (#920)

* Rename task `inference-test` to `inference-test-local` This commit clarifies that these tests are the local C++ unit tests, now that we will be adding WASM JS tests to CI. * Remove unused WASM test-page files This removes the files for building a WASM test page that functions very similarly to the `about:translations` page in Firefox. I kept these files around during the initial clone of the repository, because I wasn't sure yet if I wanted to use anything here for testing in CI, but I think it will be cleaner to just make new CI tests. * Add vitest infrastructure for WASM CI tests https://vitest.dev/ seems like a simple, easy-to-use JS testing famework that is installable and configurable with NPM. This patch only introduces a stubbed test file with a basic assertion, but I plan to use this to test the WASM bindings and outputs more thoroughly in a subsequent PR. * Add task `inference-test-wasm` This commit adds a new task for `inference-test-wasm` that also runs in CI on relevant PRs that touch inference-related sections of the code. * Ensure macOS host dependencies build on 1 thread There is an issue with building WASM using multiple threads on Aarch64 macOS chips. It works on a single thread. This patch exposes the host operating system type to the Docker container so that the `inference-test-wasm` task can determine how many threads to use.
2024-11-08 13:30:22 -06:00 · 2024-11-08 13:30:22 -06:00 · db60f54acd
--- a/Taskfile.yml
+++ b/Taskfile.yml
@ -87,18 +87,31 @@ tasks:
      - >-
          ./inference/scripts/build-local.sh

-  inference-test:
-    desc: Run inference tests.
-    cmds:
-      - >-
-          ./inference/scripts/unit-tests.sh
-
  inference-build-wasm:
    desc: Build inference engine WASM.
    cmds:
      - >-
          ./inference/scripts/build-wasm.py {{.CLI_ARGS}}

+  inference-test-local:
+    desc: Run inference build-local C++ tests.
+    cmds:
+      - >-
+          ./inference/scripts/unit-tests.sh
+
+  inference-test-wasm:
+    desc: Run inference build-wasm JS tests.
+    deps:
+      - task: inference-build-wasm
+        vars:
+          # When the host system is macOS, the WASM build fails when
+          # building with multiple threads in the Docker container.
+          # If the host system is macOS, pass -j 1.
+          CLI_ARGS: '{{if eq (env "HOST_OS") "Darwin"}}-j 1{{end}}'
+    cmds:
+      - >-
+          cd inference/wasm/tests && npm install && npm run test
+
  lint-black:
    desc: Checks the styling of the Python code with Black.
    deps: [poetry-install-black]
--- a/inference/scripts/unit-tests.sh
+++ b/inference/scripts/unit-tests.sh
@ -5,7 +5,7 @@ set -e
 cd "$(dirname $0)/.."

 # Ensure script is running within docker
-./scripts/detect-docker.sh inference-test
+./scripts/detect-docker.sh inference-test-local

 # Check if build-local/src/tests/units directory exists
 if [ ! -d "build-local/src/tests/units" ]; then
--- a/inference/wasm/module/main.js
+++ b/inference/wasm/module/main.js
@ -1,21 +0,0 @@
-import * as readline from 'node:readline/promises';
-import {stdin, stdout} from 'node:process';
-import {BatchTranslator} from "./translator.js";
-
-const rl = readline.createInterface({input: stdin, output: stdout});
-
-const translator = new BatchTranslator();
-
-for await (const line of rl) {
-	const response = await translator.translate({
-		from: "en",
-		to: "es",
-		text: line,
-		html: false,
-		qualityScores: false
-	});
-
-	console.log(response.target.text);
-}
-
-translator.delete();
--- a/inference/wasm/module/package.json
+++ b/inference/wasm/module/package.json
@ -1,39 +0,0 @@
-{
-  "name": "@browsermt/bergamot-translator",
-  "version": "0.4.9",
-  "description": "Cross platform C++ library focusing on optimized machine translation on the consumer-grade device.",
-  "homepage": "https://github.com/browsermt/bergamot-translator#readme",
-  "repository": {
-    "type": "git",
-    "url": "git+ssh://git@github.com/browsermt/bergamot-translator.git"
-  },
-  "keywords": [
-    "machine",
-    "translation"
-  ],
-  "author": "",
-  "license": "MPL-2.0",
-  "bugs": {
-    "url": "https://github.com/browsermt/bergamot-translator/issues"
-  },
-  "type": "module",
-  "main": "translator.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "files": [
-    "worker/bergamot-translator-worker.js",
-    "worker/bergamot-translator-worker.wasm",
-    "worker/translator-worker.js",
-    "translator.js",
-    "main.js"
-  ],
-  "config": {
-    "emscripten_version": "3.1.8"
-  },
-  "scripts": {
-    "prepare": "test -f worker/bergamot-translator-worker.wasm || npm run build",
-    "build": "mkdir -p ../../build-wasm && docker run --rm -v $(realpath ../../):/src -v $(realpath ../../build-wasm):/build -v $(pwd)/worker:/dst -w /build emscripten/emsdk:$npm_package_config_emscripten_version sh -c \"emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=off /src && emmake make -j2 && cp bergamot-translator-worker.wasm bergamot-translator-worker.js /dst\"",
-    "test": "echo \"Hello world!\" | node main.js"
-  }
-}
--- a/inference/wasm/module/translator.js
+++ b/inference/wasm/module/translator.js
@ -1,879 +0,0 @@
-/**
- * @typedef {Object} TranslationRequest
- * @property {String} from
- * @property {String} to
- * @property {String} text
- * @property {Boolean} html
- * @property {Integer?} priority
- */
-
-/**
- * @typedef {Object} TranslationResponse
- * @property {TranslationRequest} request
- * @property {{text: string}} target
- */
-
-/**
- * NodeJS compatibility, a thin WebWorker layer around node:worker_threads.
- */
-if (!(typeof window !== 'undefined' && window.Worker)) {
-    globalThis.Worker = class {
-        #worker;
-
-        constructor(url) {
-            this.#worker = new Promise(async (accept) => {
-                const {Worker} = await import(/* webpackIgnore: true */ 'node:worker_threads');
-                accept(new Worker(url));
-            });
-        }
-
-        addEventListener(eventName, callback) {
-            this.#worker.then(worker => worker.on(eventName, (data) => callback({data})));
-        }
-
-        postMessage(message) {
-            this.#worker.then(worker => worker.postMessage(message));
-        }
-
-        terminate() {
-            this.#worker.then(worker => worker.terminate());
-        }
-    }
-}
-
-/**
- * Thrown when a pending translation is replaced by another newer pending
- * translation.
- */
-export class SupersededError extends Error {}
-
-
-/**
- * Thrown when a translation was removed from the queue.
- */
-export class CancelledError extends Error {}
-
-
-/**
- * Wrapper around bergamot-translator loading and model management.
- */
- export class TranslatorBacking {
-    
-    /**
-     * @param {{
-     *  cacheSize?: number,
-     *  useNativeIntGemm?: boolean,
-     *  downloadTimeout?: number,
-     *  registryUrl?: string
-     *  pivotLanguage?: string?
-     *  onerror?: (err: Error)
-     * }} options
-     */
-    constructor(options) {
-        this.options = options || {};
-
-        this.registryUrl = this.options.registryUrl || 'https://bergamot.s3.amazonaws.com/models/index.json';
-
-        this.downloadTimeout = 'downloadTimeout' in this.options ? parseInt(this.options.downloadTimeout) : 60000;
-
-        /**
-         * registry of all available models and their urls
-         * @type {Promise<Model[]>}
-         */
-        this.registry = this.loadModelRegistery();
-
-        /**
-         * Map of downloaded model data files as buffers per model.
-         * @type {Map<{from:string,to:string}, Promise<Map<string,ArrayBuffer>>>}
-         */
-        this.buffers = new Map();
-
-        /**
-         * @type {string?}
-         */
-        this.pivotLanguage = 'pivotLanguage' in this.options ? options.pivotLanguage : 'en';
-        
-        /**
-         * A map of language-pairs to a list of models you need for it.
-         * @type {Map<{from:string,to:string}, Promise<{from:string,to:string}[]>>}
-         */
-        this.models = new Map();
-
-        /**
-         * Error handler for all errors that are async, not tied to a specific
-         * call and that are unrecoverable.
-         * @type {(error: Error)}
-         */
-        this.onerror = this.options.onerror || (err => console.error('WASM Translation Worker error:', err));
-    }
-
-    /**
-     * Loads a worker thread, and wraps it in a message passing proxy. I.e. it
-     * exposes the entire interface of TranslationWorker here, and all calls
-     * to it are async. Do note that you can only pass arguments that survive
-     * being copied into a message. 
-     * @return {Promise<{worker:Worker, exports:Proxy<TranslationWorker>}>}
-     */
-    async loadWorker() {
-        const worker = new Worker(new URL('./worker/translator-worker.js', import.meta.url));
-
-        /**
-         * Incremental counter to derive request/response ids from.
-         */
-        let serial = 0;
-
-        /**
-         * Map of pending requests
-         * @type {Map<number,{accept:(any), reject:(Error)}>}
-         */
-        const pending = new Map();
-
-        // Function to send requests
-        const call = (name, ...args) => new Promise((accept, reject) => {
-            const id = ++serial;
-            pending.set(id, {
-                accept,
-                reject,
-                callsite: { // for debugging which call caused the error
-                    message: `${name}(${args.map(arg => String(arg)).join(', ')})`,
-                    stack: new Error().stack
-                }
-            });
-            worker.postMessage({id, name, args});
-        });
-
-        // … receive responses
-        worker.addEventListener('message', function({data: {id, result, error}}) {
-            if (!pending.has(id)) {
-                console.debug('Received message with unknown id:', arguments[0]);
-                throw new Error(`BergamotTranslator received response from worker to unknown call '${id}'`);
-            }
-
-            const {accept, reject, callsite} = pending.get(id);
-            pending.delete(id);
-
-            if (error !== undefined)
-                reject(Object.assign(new Error(), error, {
-                    message: error.message + ` (response to ${callsite.message})`,
-                    stack: error.stack ? `${error.stack}\n${callsite.stack}` : callsite.stack
-                }));
-            else
-                accept(result);
-        });
-
-        // … and general errors
-        worker.addEventListener('error', this.onerror.bind(this));
-
-        // Await initialisation. This will also nicely error out if the WASM
-        // runtime fails to load.
-        await call('initialize', this.options);
-
-        /**
-         * Little wrapper around the message passing api of Worker to make it
-         * easy to await a response to a sent message. This wraps the worker in
-         * a Proxy so you can treat it as if it is an instance of the
-         * TranslationWorker class that lives inside the worker. All function
-         * calls to it are transparently passed through the message passing
-         * channel.
-         */
-        return {
-            worker,
-            exports: new Proxy({}, {
-                get(target, name, receiver) {
-                    // Prevent this object from being marked "then-able"
-                    if (name !== 'then')
-                        return (...args) => call(name, ...args);
-                }
-            })
-        };
-    }
-
-    /**
-     * Loads the model registry. Uses the registry shipped with this extension,
-     * but formatted a bit easier to use, and future-proofed to be swapped out
-     * with a TranslateLocally type registry.
-     * @return {Promise<{
-     *   from: string,
-     *   to: string,
-     *   files: {
-     *     [part:string]: {
-     *       name: string,
-     *       size: number,
-     *       expectedSha256Hash: string
-     *     }
-     *   }[]
-     * }>}
-     */
-    async loadModelRegistery() {
-        const response = await fetch(this.registryUrl, {credentials: 'omit'});
-        const registry = await response.json();
-
-        // Add 'from' and 'to' keys for each model.
-        return Array.from(Object.entries(registry), ([key, files]) => {
-            return {
-                from: key.substring(0, 2),
-                to: key.substring(2, 4),
-                files
-            }
-        });
-    }
-
-    /**
-     * Gets or loads translation model data. Caching wrapper around
-     * `loadTranslationModel()`.
-     * @param {{from:string, to:string}}
-     * @return {Promise<{
-     *   model: ArrayBuffer,
-     *   vocab: ArrayBuffer,
-     *   shortlist: ArrayBuffer,
-     *   qualityModel: ArrayBuffer?
-     * }>}
-     */
-    getTranslationModel({from, to}, options) {
-        const key = JSON.stringify({from, to});
-
-        if (!this.buffers.has(key)) {
-            const promise = this.loadTranslationModel({from, to}, options);
-
-            // set the promise so we return the same promise when its still pending
-            this.buffers.set(key, promise);
-
-            // But if loading fails, remove the promise again so we can try again later
-            promise.catch(err => this.buffers.delete(key))
-        }
-
-        return this.buffers.get(key);
-    }
-
-    /**
-     * Downloads a translation model and returns a set of
-     * ArrayBuffers. These can then be passed to a TranslationWorker thread
-     * to instantiate a TranslationModel inside the WASM vm.
-     * @param {{from:string, to:string}}
-     * @param {{signal:AbortSignal?}?}
-     * @return {Promise<{
-     *   model: ArrayBuffer,
-     *   vocab: ArrayBuffer,
-     *   shortlist: ArrayBuffer,
-     *   qualityModel: ArrayBuffer?
-     *   config: string?
-     * }>}
-     */
-    async loadTranslationModel({from, to}, options) {
-        performance.mark(`loadTranslationModule.${JSON.stringify({from, to})}`);
-
-        // Find that model in the registry which will tell us about its files
-        const entries = (await this.registry).filter(model => model.from == from && model.to == to);
-
-        if (!entries)
-            throw new Error(`No model for '${from}' -> '${to}'`);
-
-        const files = entries[0].files;
-
-        const abort = () => reject(new CancelledError('abort signal'));
-
-        // Promise that resolves (or rejects really) when the abort signal hits
-        const escape = new Promise((accept, reject) => {
-            if (options?.signal)
-                options.signal.addEventListener('abort', abort);
-        });
-
-        // Download all files mentioned in the registry entry. Race the promise
-        // of all fetch requests, and a promise that rejects on the abort signal
-        const buffers = Object.fromEntries(await Promise.race([
-            Promise.all(Object.entries(files).map(async ([part, file]) => {
-                // Special case where qualityModel is not part of the model, and this
-                // should also catch the `config` case.
-                if (file === undefined || file.name === undefined)
-                    return [part, null];
-
-                try {
-                    return [part, await this.fetch(file.name, file.expectedSha256Hash, options)];
-                } catch (cause) {
-                    throw new Error(`Could not fetch ${file.name} for ${from}->${to} model`, {cause});
-                }
-            })),
-            escape
-        ]));
-
-        // Nothing to abort now, clean up abort promise
-        if (options?.signal)
-            options.signal.removeEventListener('abort', abort);
-
-        performance.measure('loadTranslationModel', `loadTranslationModule.${JSON.stringify({from, to})}`);
-
-        let vocabs = [];
-
-        if (buffers.vocab)
-            vocabs = [buffers.vocab]
-        else if (buffers.trgvocab && buffers.srcvocab)
-            vocabs = [buffers.srcvocab, buffers.trgvocab]
-        else
-            throw new Error(`Could not identify vocab files for ${from}->${to} model among: ${Array.from(Object.keys(files)).join(' ')}`);
-
-        let config = {};
-
-        // For the Ukrainian models we need to override the gemm-precision
-        if (files.model.name.endsWith('intgemm8.bin'))
-            config['gemm-precision'] = 'int8shiftAll';
-
-        // If quality estimation is used, we need to turn off skip-cost. Turning
-        // this off causes quite the slowdown.
-        if (files.qualityModel)
-            config['skip-cost'] = false;
-
-        // Allow the registry to also specify marian configuration parameters
-        if (files.config)
-            Object.assign(config, files.config);
-
-        // Translate to generic bergamot-translator format that also supports
-        // separate vocabularies for input & output language, and calls 'lex'
-        // a more descriptive 'shortlist'.
-        return {
-            model: buffers.model,
-            shortlist: buffers.lex,
-            vocabs,
-            qualityModel: buffers.qualityModel,
-            config
-        };
-    }
-
-    /**
-     * Helper to download file from the web. Verifies the checksum.
-     * @param {string} url
-     * @param {string?} checksum sha256 checksum as hexadecimal string
-     * @param {{signal:AbortSignal}?} extra fetch options
-     * @returns {Promise<ArrayBuffer>}
-     */
-    async fetch(url, checksum, extra) {
-        // Rig up a timeout cancel signal for our fetch
-        const controller = new AbortController();
-        const abort = () => controller.abort();
-
-        const timeout = this.downloadTimeout ? setTimeout(abort, this.downloadTimeout) : null;
-
-        try {
-            // Also maintain the original abort signal
-            if (extra?.signal)
-                extra.signal.addEventListener('abort', abort);
-
-            const options = {
-                credentials: 'omit',
-                signal: controller.signal,
-            };
-
-            if (checksum)
-                options['integrity'] = `sha256-${this.hexToBase64(checksum)}`;
-
-            // Disable the integrity check for NodeJS because of
-            // https://github.com/nodejs/undici/issues/1594
-            if (typeof window === 'undefined')
-                delete options['integrity'];
-
-            // Start downloading the url, using the hex checksum to ask
-            // `fetch()` to verify the download using subresource integrity 
-            const response = await fetch(url, options);
-
-            // Finish downloading (or crash due to timeout)
-            return await response.arrayBuffer();
-
-        } finally {
-            if (timeout)
-                clearTimeout(timeout);
-
-            if (extra?.signal)
-                extra.signal.removeEventListener('abort', abort);
-        }
-    }
-
-    /**
-     * Converts the hexadecimal hashes from the registry to something we can use with
-     * the fetch() method.
-     */
-    hexToBase64(hexstring) {
-        return btoa(hexstring.match(/\w{2}/g).map(function(a) {
-            return String.fromCharCode(parseInt(a, 16));
-        }).join(""));
-    }
-
-    /**
-     * Crappy named method that gives you a list of models to translate from
-     * one language into the other. Generally this will be the same as you
-     * just put in if there is a direct model, but it could return a list of
-     * two models if you need to pivot through a third language.
-     * Returns just [{from:str,to:str}...]. To be used something like this:
-     * ```
-     * const models = await this.getModels(from, to);
-     * models.forEach(({from, to}) => {
-     *   const buffers = await this.loadTranslationModel({from,to});
-     *   [TranslationWorker].loadTranslationModel({from,to}, buffers)
-     * });
-     * ```
-     * @returns {Promise<TranslationModel[]>}
-     */
-    getModels({from, to}) {
-        const key = JSON.stringify({from, to});
-
-        // Note that the `this.models` map stores Promises. This so that
-        // multiple calls to `getModels` that ask for the same model will
-        // return the same promise, and the actual lookup is only done once.
-        // The lookup is async because we need to await `this.registry`
-        if (!this.models.has(key))
-            this.models.set(key, this.findModels(from, to));
-
-        return this.models.get(key);
-    }
-
-    /**
-     * Find model (or model pair) to translate from `from` to `to`.
-     * @param {string} from
-     * @param {string} to
-     * @returns {Promise<TranslationModel[]>}
-     */
-    async findModels(from, to) {
-        const registry = await this.registry;
-
-        let direct = [], outbound = [], inbound = [];
-
-        registry.forEach(model => {
-            if (model.from === from && model.to === to)
-                direct.push(model);
-            else if (model.from === from && model.to === this.pivotLanguage)
-                outbound.push(model);
-            else if (model.to === to && model.from === this.pivotLanguage)
-                inbound.push(model);
-        });
-
-        if (direct.length)
-            return [direct[0]];
-
-        if (outbound.length && inbound.length)
-            return [outbound[0], inbound[0]];
-
-        throw new Error(`No model available to translate from '${from}' to '${to}'`);
-    }
-}
-
-/**
- * Translator balancing between throughput and latency. Can use multiple worker
- * threads.
- */
-export class BatchTranslator {
-    /**
-     * @param {{
-     *  cacheSize?: number,
-     *  useNativeIntGemm?: boolean,
-     *  workers?: number,
-     *  batchSize?: number,
-     *  downloadTimeout?: number,
-     *  workerUrl?: string,
-     *  registryUrl?: string
-     *  pivotLanguage?: string?
-     * }} options
-     */
-    constructor(options, backing) {
-        if (!backing)
-            backing = new TranslatorBacking(options);
-
-        this.backing = backing;
-
-        /**
-         * @type {Array<{idle:Boolean, worker:Proxy}>} List of active workers
-         * (and a flag to mark them idle or not)
-         */
-        this.workers = [];
-
-        /**
-         * Maximum number of workers
-         * @type {number} 
-         */
-        this.workerLimit = Math.max(options?.workers || 0, 1);
-
-        /**
-         * List of batches we push() to & shift() from using `enqueue`.
-         * @type {{
-         *    id: number,
-         *    key: string,
-         *    priority: number,
-         *    models: TranslationModel[],
-         *    requests: Array<{
-         *      request: TranslationRequest,
-         *      resolve: (response: TranslationResponse),
-         *      reject: (error: Error)
-         *    }>
-         * }}
-         */
-        this.queue = [];
-
-        /**
-         * batch serial to help keep track of batches when debugging
-         * @type {Number}
-         */
-        this.batchSerial = 0;
-
-        /**
-         * Number of requests in a batch before it is ready to be translated in
-         * a single call. Bigger is better for throughput (better matrix packing)
-         * but worse for latency since you'll have to wait for the entire batch
-         * to be translated.
-         * @type {Number}
-         */
-        this.batchSize = Math.max(options?.batchSize || 8, 1);
-
-        this.onerror = options?.onerror || (err => console.error('WASM Translation Worker error:', err));
-    }
-    
-    /**
-     * Destructor that stops and cleans up.
-     */
-    async delete() {
-        // Empty the queue
-        this.remove(() => true);
-
-        // Terminate the workers
-        this.workers.forEach(({worker}) => worker.terminate());
-    }
-
-    /**
-     * Makes sure queued work gets send to a worker. Will delay it till `idle`
-     * to make sure the batches have been filled to some degree. Will keep
-     * calling itself as long as there is work in the queue, but it does not
-     * hurt to call it multiple times. This function always returns immediately.
-     */
-    notify() {
-        setTimeout(async () => {
-            // Is there work to be done?
-            if (!this.queue.length)
-                return;
-
-            // Find an idle worker
-            let worker = this.workers.find(worker => worker.idle);
-
-            // No worker free, but space for more?
-            if (!worker && this.workers.length < this.workerLimit) {
-                try {
-                    // Claim a place in the workers array (but mark it busy so
-                    // it doesn't get used by any other `notify()` calls).
-                    const placeholder = {idle: false};
-                    this.workers.push(placeholder);
-
-                    // adds `worker` and `exports` props
-                    Object.assign(placeholder, await this.backing.loadWorker());
-
-                    // At this point we know our new worker will be usable.
-                    worker = placeholder;
-                } catch (e) {
-                    this.onerror(new Error(`Could not initialise translation worker: ${e.message}`));
-                }
-            }
-
-            // If no worker, that's the end of it.
-            if (!worker)
-                return;
-
-            // Up to this point, this function has not used await, so no
-            // chance that another call stole our batch since we did the check
-            // at the beginning of this function and JavaScript is only
-            // cooperatively parallel.
-            const batch = this.queue.shift();
-
-            // Put this worker to work, marking as busy
-            worker.idle = false;
-            try {
-                await this.consumeBatch(batch, worker.exports);
-            } catch (e) {
-                batch.requests.forEach(({reject}) => reject(e));
-            }
-            worker.idle = true;
-
-            // Is there more work to be done? Do another idleRequest
-            if (this.queue.length)
-                this.notify();
-        });
-    }
-
-    /**
-     * The only real public call you need!
-     * ```
-     * const {target: {text:string}} = await this.translate({
-     *   from: 'de',
-     *   to: 'en',
-     *   text: 'Hallo Welt!',
-     *   html: false, // optional
-     *   priority: 0 // optional, like `nice` lower numbers are translated first
-     * })
-     * ```
-     * @param {TranslationRequest} request
-     * @returns {Promise<TranslationResponse>}
-     */
-    translate(request) {
-        const {from, to, priority} = request;
-
-        return new Promise(async (resolve, reject) => {
-            try {
-                // Batching key: only requests with the same key can be batched
-                // together. Think same translation model, same options.
-                const key = JSON.stringify({from, to});
-
-                // (Fetching models first because if we would do it between looking
-                // for a batch and making a new one, we end up with a race condition.)
-                const models = await this.backing.getModels(request);
-                
-                // Put the request and its callbacks into a fitting batch
-                this.enqueue({key, models, request, resolve, reject, priority});
-
-                // Tell a worker to pick up the work at some point.
-                this.notify();
-            } catch (e) {
-                reject(e);
-            }
-        });
-    }
-
-    /**
-     * Prune pending requests by testing each one of them to whether they're
-     * still relevant. Used to prune translation requests from tabs that got
-     * closed.
-     * @param {(request:TranslationRequest) => boolean} filter evaluates to true if request should be removed
-     */
-    remove(filter) {
-        const queue = this.queue;
-
-        this.queue = [];
-
-        queue.forEach(batch => {
-            batch.requests.forEach(({request, resolve, reject}) => {
-                if (filter(request)) {
-                    // Add error.request property to match response.request for
-                    // a resolve() callback. Pretty useful if you don't want to
-                    // do all kinds of Funcion.bind() dances.
-                    reject(Object.assign(new CancelledError('removed by filter'), {request}));
-                    return;
-                }
-
-                this.enqueue({
-                    key: batch.key,
-                    priority: batch.priority,
-                    models: batch.models,
-                    request,
-                    resolve,
-                    reject
-                });
-            });
-        });
-    }
-
-    /**
-     * Internal function used to put a request in a batch that still has space.
-     * Also responsible for keeping the batches in order of priority. Called by
-     * `translate()` but also used when filtering pending requests.
-     * @param {{request:TranslateRequest, models:TranslationModel[], key:String, priority:Number?, resolve:(TranslateResponse)=>any, reject:(Error)=>any}}
-     */
-    enqueue({key, models, request, resolve, reject, priority}) {
-        if (priority === undefined)
-            priority = 0;
-         // Find a batch in the queue that we can add to
-         // (TODO: can we search backwards? that would speed things up)
-        let batch = this.queue.find(batch => {
-            return batch.key === key
-                && batch.priority === priority
-                && batch.requests.length < this.batchSize
-        });
-
-        // No batch or full batch? Queue up a new one
-        if (!batch) {
-            batch = {id: ++this.batchSerial, key, priority, models, requests: []};
-            this.queue.push(batch);
-            this.queue.sort((a, b) => a.priority - b.priority);
-        }
-
-        batch.requests.push({request, resolve, reject});
-    }
-
-    /**
-     * Internal method that uses a worker thread to process a batch. You can
-     * wait for the batch to be done by awaiting this call. You should only
-     * then reuse the worker otherwise you'll just clog up its message queue.
-     */
-    async consumeBatch(batch, worker) {
-        performance.mark('BergamotBatchTranslator.start');
-
-        // Make sure the worker has all necessary models loaded. If not, tell it
-        // first to load them.
-        await Promise.all(batch.models.map(async ({from, to}) => {
-            if (!await worker.hasTranslationModel({from, to})) {
-                const buffers = await this.backing.getTranslationModel({from, to});
-                await worker.loadTranslationModel({from, to}, buffers);
-            }
-        }));
-
-        // Call the worker to translate. Only sending the actually necessary
-        // parts of the batch to avoid trying to send things that don't survive
-        // the message passing API between this thread and the worker thread.
-        const responses = await worker.translate({
-            models: batch.models.map(({from, to}) => ({from, to})),
-            texts: batch.requests.map(({request: {text, html, qualityScores}}) => ({
-                text: text.toString(),
-                html: !!html,
-                qualityScores: !!qualityScores
-            }))
-        });
-
-        // Responses are in! Connect them back to their requests and call their
-        // callbacks.
-        batch.requests.forEach(({request, resolve, reject}, i) => {
-            // TODO: look at response.ok and reject() if it is false
-            resolve({
-                request, // Include request for easy reference? Will allow you
-                         // to specify custom properties and use that to link
-                         // request & response back to each other.
-                ...responses[i] // {target: {text: String}}
-            });
-        });
-        
-        performance.measure('BergamotBatchTranslator', 'BergamotBatchTranslator.start');
-    }
-}
-
-
-/**
- * Translator optimised for interactive use.
- */
-export class LatencyOptimisedTranslator {
-    /**
-     * @type {TranslatorBacking}
-     */
-    backing;
-
-    /**
-     * @type {Promise<{idle:boolean, worker:Worker, exports:Proxy<TranslationWorker>}>}
-     */
-    worker;
-
-    /**
-     * @type {{request: TranslationRequest, accept:(TranslationResponse), reject:(Error)} | null}
-     */
-    pending;
-
-    /**
-     * @param {{
-     *  cacheSize?: number,
-     *  useNativeIntGemm?: boolean,
-     *  downloadTimeout?: number,
-     *  workerUrl?: string,
-     *  registryUrl?: string
-     *  pivotLanguage?: string?
-     * }} options
-     */
-    constructor(options, backing) {
-        if (!backing)
-            backing = new TranslatorBacking(options);
-
-        this.backing = backing;
-
-        // Exposing the this.loadWorker() returned promise through this.worker
-        // so that you can use that to catch any errors that happened during
-        // loading.
-        this.worker = this.backing.loadWorker().then(worker => ({...worker, idle:true}));
-    }
-
-    /**
-     * Destructor that stops and cleans up.
-     */
-    async delete() {
-        // Cancel pending translation
-        if (this.pending) {
-            this.pending.reject(new CancelledError('translator got deleted'));
-            this.pending = null;
-        }
-
-        // Terminate the worker (I don't care if this fails)
-        try {
-            const {worker} = await this.worker;
-            worker.terminate();
-        } finally {
-            this.worker = null;
-        }
-    }
-    
-    /**
-     * Sets `request` as the next translation to process. If there was already
-     * a translation waiting to be processed, their promise is rejected with a
-     * SupersededError.
-     * @param {TranslationRequest} request
-     * @return {Promise<TranslationResponse>}
-     */
-    translate(request, options) {
-        if (this.pending)
-            this.pending.reject(new SupersededError());
-        
-        return new Promise((accept, reject) => {
-            const pending = {request, accept, reject, options};
-
-            if (options?.signal) {
-                options.signal.addEventListener('abort', e => {
-                    reject(new CancelledError('abort signal'));
-                    if (this.pending === pending)
-                        this.pending = null;
-                });
-            }
-
-            this.pending = pending;
-            this.notify();
-        });
-    }
-    
-    notify() {
-        setTimeout(async () => {
-            if (!this.pending)
-                return;
-
-            // Catch errors such as the worker not working
-            try {
-                // Possibly wait for the worker to finish loading. After it loaded
-                // these calls are pretty much instantaneous.
-                const worker = await this.worker;
-
-                // Is another notify() call hogging the worker? Then stop.
-                if (!worker.idle)
-                    return;
-
-                // Claim the pending translation request.
-                const {request, accept, reject, options} = this.pending;
-                this.pending = null;
-
-                // Mark the worker as occupied
-                worker.idle = false;
-                    
-                try {
-                    const models = await this.backing.getModels(request)
-
-                    await Promise.all(models.map(async ({from, to}) => {
-                        if (!await worker.exports.hasTranslationModel({from, to})) {
-                            const buffers = await this.backing.getTranslationModel({from, to}, {signal: options?.signal});
-                            await worker.exports.loadTranslationModel({from, to}, buffers);
-                        }
-                    }));
-
-                    const {text, html, qualityScores} = request;
-                    const responses = await worker.exports.translate({
-                        models: models.map(({from,to}) => ({from, to})),
-                        texts: [{text, html, qualityScores}]
-                    });
-
-                    accept({request, ...responses[0]});
-                } catch (e) {
-                    reject(e);
-                }
-
-                worker.idle = true;
-
-                // Is there more work to be done? Do another idleRequest
-                if (this.pending)
-                    this.notify();
-            } catch (e) {
-                this.backing.onerror(e);
-            }
-        });
-    }
-}
--- a/inference/wasm/module/worker/package.json
+++ b/inference/wasm/module/worker/package.json
@ -1,3 +0,0 @@
-{
-	"type": "commonjs"
-}
--- a/inference/wasm/module/worker/translator-worker.js
+++ b/inference/wasm/module/worker/translator-worker.js
@ -1,475 +0,0 @@
-/**
- * Wrapper around the dirty bits of Bergamot's WASM bindings.
- */
-
-// Global because importScripts is global.
-var Module = {};
-
-/**
- * node.js compatibility: Fake GlobalWorkerScope that emulates being inside a
- * WebWorker
- */
-if (typeof self === 'undefined') {
-    global.Module = Module;
-
-    global.self = new class GlobalWorkerScope {
-        /** @type {import("node:worker_threads").MessagePort} */
-        #port;
-
-        constructor() {
-            const {parentPort} = require(/* webpackIgnore: true */ 'node:worker_threads');
-            this.#port = parentPort;
-        }
-
-        /**
-         * Add event listener to listen for messages posted to the worker.
-         * @param {string} eventName
-         * @param {(object)} callback
-         */
-        addEventListener(eventName, callback) {
-            this.#port.on(eventName, (data) => callback({data}));
-        }
-
-        /**
-         * Post message outside, to the owner of the Worker.
-         * @param {any} message
-         */
-        postMessage(message) {
-            this.#port.postMessage(message);
-        }
-
-        /**
-         * @param {...string} scripts - Paths to scripts to import in that order
-         */
-        importScripts(...scripts) {
-            const {readFileSync} = require(/* webpackIgnore: true */ 'node:fs');
-            const {join} = require(/* webpackIgnore: true */ 'node:path');
-            for (let pathname of scripts) {
-                const script = readFileSync(join(__dirname, pathname), {encoding: 'utf-8'});
-                eval.call(global, script);
-            }
-        }
-
-        /**
-         * Adds support for local file urls. Assumes anything that doesn't start
-         * with "http" to be a local path.
-         * @param {string} url - path or url
-         * @param {object?} options - See `fetch()` options
-         * @return {Promise<Response>}
-         */
-        async fetch(url, options) {
-            if (url.protocol === 'file:') {
-                const {readFile} = require(/* webpackIgnore: true */ 'node:fs/promises');
-                const buffer = await readFile(url.pathname);
-                const blob = new Blob([buffer]);
-                return new Response(blob, {
-                    status: 200,
-                    statusText: 'OK',
-                    headers: {
-                        'Content-Type': 'application/wasm',
-                        'Content-Length': blob.size.toString()
-                    }
-                });
-            }
-
-            return await fetch(url, options);
-        }
-
-        get location() {
-            return new URL(`file://${__filename}`);
-        }
-    }
-}
-
-class YAML {
-    /**
-     * Parses YAML into dictionary. Does not interpret types, all values are a
-     * string or a list of strings. No support for objects other than the top
-     * level.
-     * @param {string} yaml
-     * @return {{[string]: string | string[]}}
-     */
-    static parse(yaml) {
-        const out = {};
-
-        yaml.split('\n').reduce((key, line, i) => {
-            let match;
-            if (match = line.match(/^\s*-\s+(.+?)$/)) {
-                if (!Array.isArray(out[key]))
-                    out[key] = out[key].trim() ? [out[key]] : [];
-                out[key].push(match[1].trim());
-            }
-            else if (match = line.match(/^\s*([A-Za-z0-9_][A-Za-z0-9_-]*):\s*(.*)$/)) {
-                key = match[1];
-                out[key] = match[2].trim();
-            }
-            else if (!line.trim()) {
-                // whitespace, ignore
-            }
-            else {
-                throw Error(`Could not parse line ${i+1}: "${line}"`);
-            }
-            return key;
-        }, null);
-
-        return out;
-    }
-
-    /**
-     * Turns an object into a YAML string. No support for objects, only simple
-     * types and lists of simple types.
-     * @param {{[string]: string | number | boolean | string[]}} data
-     * @return {string}
-     */
-    static stringify(data) {
-        return Object.entries(data).reduce((str, [key, value]) => {
-            let valstr = '';
-            if (Array.isArray(value))
-                valstr = value.map(val => `\n  - ${val}`).join('');
-            else if (typeof value === 'number' || typeof value === 'boolean' || value.match(/^\d*(\.\d+)?$/))
-                valstr = `${value}`;
-            else
-                valstr = `${value}`; // Quote?
-
-            return `${str}${key}: ${valstr}\n`;
-        }, '');
-    }
-}
-
-/**
- * Wrapper around the bergamot-translator exported module that hides the need
- * of working with C++ style data structures and does model management.
- */
-class BergamotTranslatorWorker {
-    /**
-     * Map of expected symbol -> name of fallback symbol for functions that can
-     * be swizzled for a faster implementation. Firefox Nightly makes use of
-     * this.
-     */
-    static GEMM_TO_FALLBACK_FUNCTIONS_MAP = {
-        'int8_prepare_a': 'int8PrepareAFallback',
-        'int8_prepare_b': 'int8PrepareBFallback',
-        'int8_prepare_b_from_transposed': 'int8PrepareBFromTransposedFallback',
-        'int8_prepare_b_from_quantized_transposed': 'int8PrepareBFromQuantizedTransposedFallback',
-        'int8_prepare_bias': 'int8PrepareBiasFallback',
-        'int8_multiply_and_add_bias': 'int8MultiplyAndAddBiasFallback',
-        'int8_select_columns_of_b': 'int8SelectColumnsOfBFallback'
-    };
-
-    /**
-     * Name of module exported by Firefox Nightly that exports an optimised
-     * implementation of the symbols mentioned above.
-     */
-    static NATIVE_INT_GEMM = 'mozIntGemm';
-
-    /**
-     * Empty because we can't do async constructors yet. It is the
-     * responsibility of whoever owns this WebWorker to call `initialize()`.
-     */
-    constructor(options) {}
-
-    /**
-     * Instantiates a new translation worker with optional options object.
-     * If this call succeeds, the WASM runtime is loaded and ready.
-     * 
-     * Available options are:
-     *   useNativeIntGemm: {true | false} defaults to false. If true, it will
-     *                     attempt to link to the intgemm module available in
-     *                     Firefox Nightly which makes translations much faster.
-     *          cacheSize: {Number} defaults to 0 which disables translation
-     *                     cache entirely. Note that this is a theoretical
-     *                     upper bound. In practice it will use about 1/3th of
-     *                     the cache specified here. 2^14 is not a bad starting
-     *                     value.
-     * @param {{useNativeIntGemm: boolean, cacheSize: number}} options
-     */
-    async initialize(options) {
-        this.options = options || {};
-        this.models = new Map(); // Map<str,Promise<TranslationModel>>
-        this.module = await this.loadModule();
-        this.service = await this.loadTranslationService();
-    }
-
-    /**
-     * Tries to load native IntGEMM module for bergamot-translator. If that
-     * fails because it or any of the expected functions is not available, it
-     * falls back to using the naive implementations that come with the wasm
-     * binary itself through `linkFallbackIntGemm()`.
-     * @param {{env: {memory: WebAssembly.Memory}}} info
-     * @return {{[method:string]: (...any) => any}}
-     */
-    linkNativeIntGemm(info) {
-        if (!WebAssembly['mozIntGemm']) {
-            console.warn('Native gemm requested but not available, falling back to embedded gemm');
-            return this.linkFallbackIntGemm(info);
-        }
-
-        const instance = new WebAssembly.Instance(WebAssembly['mozIntGemm'](), {
-            '': {memory: info['env']['memory']}
-        });
-
-        if (!Array.from(Object.keys(BergamotTranslatorWorker.GEMM_TO_FALLBACK_FUNCTIONS_MAP)).every(fun => instance.exports[fun])) {
-            console.warn('Native gemm is missing expected functions, falling back to embedded gemm');
-            return this.linkFallbackIntGemm(info);
-        }
-
-        return instance.exports;
-    }
-
-    /**
-     * Links intgemm functions that are already available in the wasm binary,
-     * but just exports them under the name that is expected by
-     * bergamot-translator.
-     * @param {{env: {memory: WebAssembly.Memory}}} info
-     * @return {{[method:string]: (...any) => any}}
-     */
-    linkFallbackIntGemm(info) {
-        const mapping = Object.entries(BergamotTranslatorWorker.GEMM_TO_FALLBACK_FUNCTIONS_MAP).map(([key, name]) => {
-            return [key, (...args) => Module['asm'][name](...args)]
-        });
-
-        return Object.fromEntries(mapping);
-    }
-
-    /**
-     * Internal method. Reads and instantiates the WASM binary. Returns a
-     * promise for the exported Module object that contains all the classes
-     * and functions exported by bergamot-translator.
-     * @return {Promise<BergamotTranslator>}
-     */
-    loadModule() {
-        return new Promise(async (resolve, reject) => {
-            try {
-                const response = await self.fetch(new URL('./bergamot-translator-worker.wasm', self.location));
-
-                Object.assign(Module, {
-                    instantiateWasm: (info, accept) => {
-                        try {
-                            WebAssembly.instantiateStreaming(response, {
-                                ...info,
-                                'wasm_gemm': this.options.useNativeIntGemm
-                                    ? this.linkNativeIntGemm(info)
-                                    : this.linkFallbackIntGemm(info)
-                            }).then(({instance}) => accept(instance)).catch(reject);
-                        } catch (err) {
-                            reject(err);
-                        }
-                        return {};
-                    },
-                    onRuntimeInitialized: () => {
-                        resolve(Module);
-                    }
-                });
-
-                // Emscripten glue code. Webpack et al. should not mangle the `Module` property name!
-                self.Module = Module;
-                self.importScripts('bergamot-translator-worker.js');
-            } catch (err) {
-                reject(err);
-            }
-        });
-    }
-
-    /**
-     * Internal method. Instantiates a BlockingService()
-     * @return {BergamotTranslator.BlockingService}
-     */
-    loadTranslationService() {
-        return new this.module.BlockingService({
-            cacheSize: Math.max(this.options.cacheSize || 0, 0)
-        });
-    }
-
-    /**
-     * Returns whether a model has already been loaded in this worker. Marked
-     * async because the message passing interface we use expects async methods.
-     * @param {{from:string, to:string}}
-     * @return boolean
-     */ 
-    hasTranslationModel({from,to}) {
-        const key = JSON.stringify({from,to});
-        return this.models.has(key);
-    }
-
-    /**
-     * Loads a translation model from a set of file buffers. After this, the
-     * model is available to translate with and `hasTranslationModel()` will
-     * return true for this pair.
-     * @param {{from:string, to:string}}
-     * @param {{
-     *   model: ArrayBuffer,
-     *   shortlist: ArrayBuffer,
-     *   vocabs: ArrayBuffer[],
-     *   qualityModel: ArrayBuffer?,
-     *   config?: {
-     *     [key:string]: string
-     *   }
-     * }} buffers
-     */ 
-    loadTranslationModel({from, to}, buffers) {
-        // This because service_bindings.cpp:prepareVocabsSmartMemories :(
-        const uniqueVocabs = buffers.vocabs.filter((vocab, index, vocabs) => {
-            return !vocabs.slice(0, index).includes(vocab);
-        });
-
-        const [modelMemory, shortlistMemory, qualityModel, ...vocabMemory] = [
-            this.prepareAlignedMemoryFromBuffer(buffers.model, 256),
-            this.prepareAlignedMemoryFromBuffer(buffers.shortlist, 64),
-            buffers.qualityModel // optional quality model
-                ? this.prepareAlignedMemoryFromBuffer(buffers.qualityModel, 64)
-                : null,
-            ...uniqueVocabs.map(vocab => this.prepareAlignedMemoryFromBuffer(vocab, 64))
-        ];
-
-        const vocabs = new this.module.AlignedMemoryList();
-        vocabMemory.forEach(vocab => vocabs.push_back(vocab));
-
-        // Defaults
-        let modelConfig = YAML.parse(`
-            beam-size: 1
-            normalize: 1.0
-            word-penalty: 0
-            cpu-threads: 0
-            gemm-precision: int8shiftAlphaAll
-            skip-cost: true
-        `);
-
-        if (buffers.config)
-            Object.assign(modelConfig, buffers.config);
-
-        // WASM marian is only compiled with support for shiftedAll.
-        if (modelConfig['gemm-precision'] === 'int8')
-            modelConfig['gemm-precision'] = 'int8shiftAll';
-
-        // Override these
-        Object.assign(modelConfig, YAML.parse(`
-            alignment: soft
-            quiet: true
-            quiet-translation: true
-            max-length-break: 128
-            mini-batch-words: 1024
-            workspace: 128
-            max-length-factor: 2.0
-        `));
-
-        const key = JSON.stringify({from,to});
-        this.models.set(key, new this.module.TranslationModel(YAML.stringify(modelConfig), modelMemory, shortlistMemory, vocabs, qualityModel));
-    }
-
-    /**
-     * Frees up memory used by old translation model. Does nothing if model is
-     * already deleted.
-     * @param {{from:string, to:string}}
-     */
-    freeTranslationModel({from, to}) {
-        const key = JSON.stringify({from,to});
-        
-        if (!this.models.has(key))
-            return;
-        
-        const model = this.models.get(key);
-        this.models.delete(key);
-
-        model.delete();
-    }
-
-    /**
-     * Internal function. Copies the data from an ArrayBuffer into memory that
-     * can be used inside the WASM vm by Marian.
-     * @param {{ArrayBuffer}} buffer
-     * @param {number} alignmentSize
-     * @return {BergamotTranslator.AlignedMemory}
-     */
-    prepareAlignedMemoryFromBuffer(buffer, alignmentSize) {
-        const bytes = new Int8Array(buffer);
-        const memory = new this.module.AlignedMemory(bytes.byteLength, alignmentSize);
-        memory.getByteArrayView().set(bytes);
-        return memory;
-    }
-
-    /**
-     * Public. Does actual translation work. You have to make sure that the
-     * models necessary for translating text are already loaded before calling
-     * this method. Returns a promise with translation responses.
-     * @param {{models: {from:string, to:string}[], texts: {text: string, html: boolean}[]}}
-     * @return {Promise<{target: {text: string}}[]>}
-     */
-    translate({models, texts}) {
-        // Convert texts array into a std::vector<std::string>.
-        let input = new this.module.VectorString();
-        texts.forEach(({text}) => input.push_back(text));
-
-        // Extracts the texts[].html options into ResponseOption objects
-        let options = new this.module.VectorResponseOptions();
-        texts.forEach(({html, qualityScores}) => options.push_back({alignment: false, html, qualityScores}));
-
-        // Turn our model names into a list of TranslationModel pointers
-        const translationModels = models.map(({from,to}) => {
-            const key = JSON.stringify({from,to});
-            return this.models.get(key);
-        });
-
-        // translate the input, which is a vector<String>; the result is a vector<Response>
-        const responses = models.length > 1
-            ? this.service.translateViaPivoting(...translationModels, input, options)
-            : this.service.translate(...translationModels, input, options);
-        
-        input.delete();
-        options.delete();
-
-        // Convert the Response WASM wrappers into native JavaScript types we
-        // can send over the 'wire' (message passing) in the same format as we
-        // use in bergamot-translator.
-        const translations = texts.map((_, i) => ({
-            target: {
-                text: responses.get(i).getTranslatedText()
-            }
-        }));
-
-        responses.delete();
-
-        return translations;
-    }
-}
-
-/**
- * Because you can't put an Error object in a message. But you can post a
- * generic object!
- * @param {Error} error
- * @return {{
- *  name: string?,
- *  message: string?,
- *  stack: string?
- * }}
- */
-function cloneError(error) {
-    return {
-        name: error.name,
-        message: error.message,
-        stack: error.stack
-    };
-}
-
-// (Constructor doesn't really do anything, we need to call `initialize()`
-// first before using it. That happens from outside the worker.)
-const worker = new BergamotTranslatorWorker();
-
-self.addEventListener('message', async function({data: {id, name, args}}) {
-    if (!id)
-        console.error('Received message without id', arguments[0]);
-
-    try {
-        if (typeof worker[name] !== 'function')
-            throw TypeError(`worker[${name}] is not a function`);
-
-        // Using `Promise.resolve` to await any promises that worker[name]
-        // possibly returns.
-        const result = await Promise.resolve(Reflect.apply(worker[name], worker, args));
-        self.postMessage({id, result});
-    } catch (error) {
-        self.postMessage({
-            id,
-            error: cloneError(error)
-        })
-    }
-});
--- a/inference/wasm/node-test.js
+++ b/inference/wasm/node-test.js
@ -1,175 +0,0 @@
-#!/usr/bin/env node
-
-/**
- * A note upfront: the bergamot-translator API is pretty low level, and
- * embedding it successfully requires some knowledge about the WebWorkers and
- * WebAssembly APIs. This script tries to demonstrate the bergamot-translator
- * API with as little of that boiler plate code as possible.
- * See the wasm/test_page code for a fully fleshed out demo in a web context.
- */
-
-// For node we use the fs module to read local files. In a web context you can
-// use `fetch()` for everything.
-const fs = require('fs');
-
-// Read wasm binary into a blob, which will be loaded by
-// bergamot-translator-worker.js in a minute. In a web context, you'd be using
-// `fetch(...).then(response => response.blob())` for this, but Node does not
-// implement `fetch("file://...")` yet.
-const wasmBinary = fs.readFileSync('./bergamot-translator-worker.wasm');
-
-// Read wasm runtime code that bridges the bergmot-translator binary with JS.
-const wasmRuntime = fs.readFileSync('./bergamot-translator-worker.js', {encoding: 'utf8'});
-
-// Initialise the `Module` object. By adding methods and options to this, we can
-// affect how bergamot-translator interacts with JavaScript. See 
-// https://emscripten.org/docs/api_reference/module.html for all available
-// options. It is important that this object is initialised in the same scope
-// but before `bergamot-translation-worker.js` is executed. Once that script
-// executes, it defines the exported methods as properties of this Module
-// object.
-global.Module = {
-  wasmBinary,
-  onRuntimeInitialized
-};
-
-// Execute bergamot-translation-worker.js in this scope. This will also,
-// indirectly, call the onRuntimeInitialized function defined below and
-// referenced in the `Module` object above.
-eval.call(global, wasmRuntime);
-
-/**
- * Called from inside the bergamot-translation-worker.js script once the wasm
- * module is initialized. At this point that `Module` object that was
- * initialised above will have all the classes defined in the
- * bergamot-translator API available on it.
- */
-async function onRuntimeInitialized() {
-  // Root url for our models for now.
-  const root = 'https://storage.googleapis.com/bergamot-models-sandbox/0.3.1';
-
-  // Urls of data files necessary to create a translation model for
-  // English -> German. Note: list is in order of TranslationModel's arguments.
-  // The `alignment` value is used later on to load each part of the model with
-  // the correct alignment.
-  const files = [
-    // Neural network and weights:
-    {url: `${root}/ende/model.ende.intgemm.alphas.bin`, alignment: 256},
-    
-    // Lexical shortlist which is mainly a speed improvement method, not
-    // strictly necessary:
-    {url: `${root}/ende/lex.50.50.ende.s2t.bin`, alignment: 64},
-    
-    // Vocabulary, maps the input and output nodes of the neural network to
-    // strings. Note: "deen" may look the wrong way around but vocab is the same
-    // between de->en and en->de models.
-    {url: `${root}/ende/vocab.deen.spm`, alignment: 64},
-  ];
-
-  // Download model data and load it into aligned memory. AlignedMemory is a
-  // necessary wrapper around allocated memory inside the WASM environment.
-  // The value of `alignment` is specific for which part of the model we're
-  // loading. See https://en.wikipedia.org/wiki/Data_structure_alignment for a
-  // more general explanation.
-  const [modelMem, shortlistMem, vocabMem] = await Promise.all(files.map(async (file) => {
-    const response = await fetch(file.url);
-    const blob = await response.blob();
-    const buffer = await blob.arrayBuffer();
-    const bytes = new Int8Array(buffer);
-    const memory = new Module.AlignedMemory(bytes.byteLength, file.alignment);
-    memory.getByteArrayView().set(bytes);
-    return memory;
-  }));
-
-  // Set up translation service. This service translates a batch of text per
-  // call. The larger the batch, the faster the translation (in words per
-  // second) happens, but the longer you have to wait for all of them to finish.
-  // The constructor expects an object with options, but only one option is
-  // currently supported: `cacheSize`. Setting this to `0` disables the
-  // translation cache.
-  // **Note**: cacheSize is the theoretical maximum number of sentences that
-  // will be cached. In practise, about 1/3 of that will actually be used.
-  // See https://github.com/XapaJIaMnu/translateLocally/pull/75
-  const service = new Module.BlockingService({cacheSize: 0});
-
-  // Put vocab into its own std::vector<AlignedMemory>. Most models for the
-  // Bergamot project only have one vocabulary that is shared by both the input
-  // and output side of the translator. But in theory, you could have one for
-  // the input side and a different one for the output side. Hence: a list.
-  const vocabs = new Module.AlignedMemoryList();
-  vocabs.push_back(vocabMem);
-
-  // Config yaml (split as array to allow for indentation without adding tabs
-  // or spaces to the strings themselves.)
-  // See https://marian-nmt.github.io/docs/cmd/marian-decoder/ for the meaning
-  // of most of these options and what other options might be available.
-  const config = [
-    'beam-size: 1',
-    'normalize: 1.0',
-    'word-penalty: 0',
-    'alignment: soft', // is necessary if you want to use HTML at any point
-    'max-length-break: 128',
-    'mini-batch-words: 1024',
-    'workspace: 128',
-    'max-length-factor: 2.0',
-    'skip-cost: true',
-    'gemm-precision: int8shiftAll', // is necessary for speed and compatibility with Mozilla's models.
-  ].join('\n');
-
-  // Setup up model with config yaml and AlignedMemory objects. Optionally a
-  // quality estimation model can also be loaded but this is not demonstrated
-  // here. Generally you don't need it, and many models don't include the data
-  // file necessary to use it anyway.
-  const model = new Module.TranslationModel(config, modelMem, shortlistMem, vocabs, /*qualityModel=*/ null);
-
-  // Construct std::vector<std::string> inputs; This is our batch!
-  const input = new Module.VectorString();
-  input.push_back('<p>Hello world! Let us write a second sentence.</p> &amp; <p>Goodbye World!</p>');
-  input.push_back('This is a second example without HTML & entities.');
-
-  // Construct std::vector<ResponseOptions>, one entry per input. Note that
-  // all these three properties of your ResponseOptions object need to be
-  // specified for each entry.
-  // `qualityScores`: related to quality models not explained here. Set this
-  //   to `false`.
-  // `alignment`: computes alignment scores that maps parts of the input text
-  //   to parts of the output text. There is currently no way to get these
-  //   mappings out through the JavaScript API so I suggest you set this to
-  //   `false` as well.
-  // `html`: is the input HTML? If so, the HTML will be parsed and the markup
-  //   will be copied back into the translated output. Note: HTML has to be
-  //   valid HTML5, with proper closing tags and everything since the HTML
-  //   parser built into bergamot-translator does no error correction. Output
-  //   of e.g. `Element.innerHTML` meets this criteria.
-  const options = new Module.VectorResponseOptions();
-  options.push_back({qualityScores: false, alignment: false, html: true});
-  options.push_back({qualityScores: false, alignment: false, html: false});
-
-  // Size of `input` and `options` has to match.
-  console.assert(input.size() === options.size());
-
-  // Translate our batch of 2 requests. Output will be another vector of type 
-  // `std::vector<Response>`.
-  const output = service.translate(model, input, options);
-
-  console.assert(false);
-
-  // Number of outputs is number of inputs.
-  console.assert(input.size() === output.size());
-
-  for (let i = 0; i < output.size(); ++i) {
-    // Get output from std::vector<Response>.
-    const translation = output.get(i).getTranslatedText();
-
-    // Print raw translation for inspection.
-    console.log(translation)
-  }
-
-  // Clean-up: unlike the objects in JavaScript, the objects in the WASM
-  // environment are not automatically cleaned up when they're no longer
-  // referenced. That is why we manually have to call `delete()` on them
-  // when we're done with them.
-  input.delete();
-  options.delete();
-  output.delete();
-}
--- a/inference/wasm/test_page/bergamot-httpserver.js
+++ b/inference/wasm/test_page/bergamot-httpserver.js
@ -1,93 +0,0 @@
-const http = require('http');
-const https = require('https')
-const express = require('express');
-const app = express();
-const server = http.createServer(app);
-const fs = require('fs');
-const url = require('url');
-const nocache = require('nocache');
-const cors = require('cors');
-const path = require('path');
-
-let port = 8000;
-if (process.argv[2]) {
-    port = process.argv[2];
-}
-
-let skipssl = 0;
-if (process.argv[3]) {
-    skipssl = process.argv[3];
-}
-
-let certpath = "/etc/letsencrypt";
-if (process.argv[4]) {
-    certpath = process.argv[4];
-}
-
-app.use(cors())
-app.use(nocache());
-
-app.get('/', cors(), function(req, res) {
-    if (!req.secure && skipssl != 1) {
-        return res.redirect("https://" + req.headers.host + req.url);
-    }
-    res.sendFile(path.join(__dirname + '/index.html'));
-    res.header('Cross-Origin-Embedder-Policy','require-corp');
-    res.header('Cross-Origin-Opener-Policy','same-origin');
-    res.header('Cross-Origin-Resource-Policy','same-origin');
-});
-
-app.get('/*.*' , cors(), function(req, res) {
-    var options = url.parse(req.url, true);
-    var mime = Helper.getMime(options);
-    serveFile(res, options.pathname, mime);
-});
-
-function serveFile(res, pathName, mime) {
-    mime = mime || 'text/html';
-    fs.readFile(__dirname + '/' + pathName, function (err, data) {
-        if (err) {
-            res.writeHead(500, {"Content-Type": "text/plain"});
-            return res.end('Error loading ' + pathName + " with Error: " + err);
-        }
-        res.header('Cross-Origin-Embedder-Policy','require-corp');
-        res.header('Cross-Origin-Opener-Policy','same-origin');
-        res.header('Cross-Origin-Resource-Policy','same-origin');
-        res.writeHead(200, {"Content-Type": mime});
-        res.end(data);
-    });
-}
-
-if (skipssl != 1){
-    https.createServer({
-            key: fs.readFileSync(`${certpath}/privkey.pem`),
-            cert: fs.readFileSync(`${certpath}/cert.pem`),
-            ca: fs.readFileSync(`${certpath}/chain.pem`),
-        },
-        app
-    ).listen(443, () => {
-        console.log('Listening https port 443')
-    })
-}
-
-const Helper = {
-    types: {
-       "wasm" : "application/wasm"
-       , "js" : "application/javascript"
-       , "html" : "text/html"
-       , "htm" : "text/html"
-       , "ico" : "image/vnd.microsoft.icon"
-       , "css" : "text/css"
-    },
-    getMime: function(u) {
-        var ext = this.getExt(u.pathname).replace('.', '');
-        return this.types[ext.toLowerCase()] || 'application/octet-stream';
-    },
-    getExt: function(path) {
-        var i = path.lastIndexOf('.');
-        return (i < 0) ? '' : path.substr(i);
-    }
-};
-
-server.listen(port);
-console.log(`HTTP and BinaryJS server started on port ${port}`);
--- a/inference/wasm/test_page/css/index.css
+++ b/inference/wasm/test_page/css/index.css
@ -1,168 +0,0 @@
-* {
-  box-sizing: border-box;
-}
-
-html,
-body {
-  height: 100%;
-  margin: 0;
-  font-size: 18px;
-  font-family: Optima, Helvetica, Arial;
-}
-
-body {
-  padding: 1rem;
-}
-
-[hidden] {
-  display: none;
-}
-
-.app {
-  padding: 1rem;
-  display: grid;
-  grid: "from swap to" auto "credits credits credits" min-content / 1fr auto 1fr;
-  grid-gap: 1rem;
-  overflow: hidden;
-  min-height: 100%;
-  max-width: 1024px;
-  margin: 0 auto;
-}
-
-.swap::before {
-  display: inline-block;
-  content: '↔️';
-}
-
-@media screen and (max-width: 640px) {
-  .app {
-    grid: "from from" auto "swap swap" auto "to to" auto "credits credits" auto / 1fr;
-  }
-
-  .swap::before {
-    content: '↕️';
-  }
-}
-
-.panel {
-  display: grid;
-  grid-template-rows: auto 1fr;
-  grid-gap: 1rem;
-  max-height: 100%;
-  overflow: hidden;
-}
-
-label {
-  padding: 0 0.5em;
-  display: flex;
-  align-items: center;
-}
-
-.lang-select {
-  padding: 0.25rem 0.5rem;
-  margin-left: 1rem;
-  background: #f4f4f4;
-  font-size: 0.9rem;
-  border: 1px solid #ccc;
-  border-radius: 0.25rem;
-  cursor: pointer;
-}
-
-.panel--from {
-  grid-area: from;
-}
-
-.panel--to {
-  grid-area: to;
-}
-
-.swap {
-  align-self: center;
-  grid-area: swap;
-  font-size: 1.1rem;
-}
-
-.credits {
-  grid-area: credits;
-}
-
-.credits img {
-  float: left;
-  margin: 1em 0;
-}
-
-textarea, [contenteditable], .output-area {
-  padding: 1rem;
-  font-family: sans-serif;
-  font-size: 1rem;
-  resize: none;
-  border-radius: 2px;
-  border: 1px solid #ccc;
-  min-height: 100px;
-  max-height: 100%;
-  overflow: auto;
-}
-
-button {
-  cursor: pointer;
-  border: 1px solid #88c;
-  border-radius: 4px;
-  background: #eef;
-  padding: 0;
-  padding: 0.25rem 0.5rem;
-}
-button:hover {
-  background: #cce;
-}
-
-#output {
-  background-color: #f4f4f4;
-  position: relative;
-}
-
-.output-area [x-bergamot-word-score].bad {
-  background-image:
-    linear-gradient(45deg, transparent 65%, red 80%, transparent 90%),
-    linear-gradient(135deg, transparent 5%, red 15%, transparent 25%),
-    linear-gradient(135deg, transparent 45%, red 55%, transparent 65%),
-    linear-gradient(45deg, transparent 25%, red 35%, transparent 50%);
-  background-repeat:repeat-x;
-  background-size: 8px 2px;
-  background-position:0 95%;
-}
-
-.output-area [x-bergamot-sentence-score].bad {
-  background: rgba(255, 128, 128, 0.8);
-}
-
-.output-area [x-bergamot-sentence-index].highlight-sentence {
-  background: rgba(255, 255, 128, 0.8);
-}
-
-.app.translating #output::after {
-  position: absolute;
-  bottom: 4px;
-  right: 4px;
-  content: 'Translating…';
-}
-
-/* Loading indicator takes priority, so below the .translating selector */
-.app.loading #output::after {
-  position: absolute;
-  bottom: 4px;
-  right: 4px;
-  content: 'Loading translation model…';
-}
-
-.app {
-  position: relative;
-}
-
-#unsupported-browser {
-  position: absolute;
-  top: 0;
-  left: 0;
-  width: 100%;
-  height: 100%;
-  background: white;
-}
--- a/inference/wasm/test_page/index.html
+++ b/inference/wasm/test_page/index.html
@ -1,41 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <title>Bergamot Translations</title>
-    <link rel="stylesheet" href="css/index.css" />
-    <meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
-    <meta
-      name="viewport"
-      content="width=device-width, initial-scale=1.0, viewport-fit=cover"
-    />
-  </head>
-  <body>
-    <div class="app">
-      <div class="panel panel--from">
-        <label>
-          From
-          <select id="lang-from" name="from" class="lang-select"></select>
-        </label>
-        <div id="input" contenteditable="true"></div>
-      </div>
-      <button class="swap" title="swap"></button>
-      <div class="panel panel--to">
-        <label>
-          To
-          <select id="lang-to" name="to" class="lang-select"></select>
-        </label>
-        <div id="output" class="output-area"></div>
-      </div>
-      <div id="unsupported-browser" hidden>
-        <p>Your CPU or browser is not able to run Bergamot translator.</p>
-        <p>Try using Firefox or a Chromium based browser with <a href="https://webassembly.org/roadmap/">Fixed-width SIMD support</a>.</p>
-        <p>If you already are, you might be using a CPU that does not have support for SSE4.1 instructions.</p>
-      </div>
-      <footer class="credits">
-        <img src="logos.png" alt="Logos of the OPUS project, the Bergamot project and the European Union.">
-        <p>This project has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement No 825303.</p>
-      </footer>
-    </div>
-    <script type="module" src="js/index.js"></script>
-  </body>
-</html>
--- a/inference/wasm/test_page/js/index.js
+++ b/inference/wasm/test_page/js/index.js
@ -1,215 +0,0 @@
-import {LatencyOptimisedTranslator, TranslatorBacking, CancelledError, SupersededError} from '../node_modules/@browsermt/bergamot-translator/translator.js';
-
-function $(selector) {
-  return document.querySelector(selector);
-}
-
-function $$(selector) {
-  return document.querySelectorAll(selector);
-}
-
-function encodeHTML(text) {
-  const div = document.createElement('div');
-  div.appendChild(document.createTextNode(text));
-  return div.innerHTML;
-}
-
-function addQualityIndicators() {
-  $$('#output [x-bergamot-sentence-score]').forEach(el => {
-    // The threshold is ln(0.5) (https://github.com/browsermt/bergamot-translator/pull/370#issuecomment-1058123399)
-    el.classList.toggle('bad', parseFloat(el.getAttribute('x-bergamot-sentence-score')) < Math.log(0.5));
-  });
-
-  $$('#output [x-bergamot-word-score]').forEach(el => {
-    // The threshold is ln(0.5) (https://github.com/browsermt/bergamot-translator/pull/370#issuecomment-1058123399)
-    el.classList.toggle('bad', parseFloat(el.getAttribute('x-bergamot-word-score')) < Math.log(0.5));
-  });
-
-  // Add tooltips to each (sub)word with sentence and word score.
-  $$('#output [x-bergamot-sentence-score] > [x-bergamot-word-score]').forEach(el => {
-    const sentenceScore = parseFloat(el.parentNode.getAttribute('x-bergamot-sentence-score'));
-    const wordScore = parseFloat(el.getAttribute('x-bergamot-word-score'));
-    el.title = `Sentence: ${Math.exp(sentenceScore).toFixed(2)}  Word: ${Math.exp(wordScore).toFixed(2)}`;
-  });
-}
-
-function highlightSentence(element) {
-  const sentence = element.parentNode.hasAttribute('x-bergamot-sentence-index')
-    ? element.parentNode.getAttribute('x-bergamot-sentence-index')
-    : null;
-  $$('#output font[x-bergamot-sentence-index]').forEach(el => {
-    el.classList.toggle('highlight-sentence', el.getAttribute('x-bergamot-sentence-index') === sentence);
-  })
-}
-
-/**
- * Very minimal WISYWIG editor. Just keyboard shortcuts for the IYKYK crowd.
- */
-class Editor {
-  constructor(root) {
-    this.isApple = window.navigator.platform.startsWith('Mac');
-
-    this.root = root;
-    this.root.addEventListener('keydown', this.onkeydown.bind(this));
-
-    this.mapping = {
-      "b": "bold",
-      "i": "italic",
-      "u": "underline",
-    };
-  }
-
-  onkeydown(event) {
-    if (!(this.isApple ? event.metaKey : event.ctrlKey))
-      return;
-
-    if (!(event.key in this.mapping))
-      return;
-
-    document.execCommand(this.mapping[event.key], false, null);
-
-    event.preventDefault();
-  }
-}
-
-async function main() {
-  const options = {
-    cacheSize: 2^13,
-    downloadTimeout: null // Disable timeout
-  };
-  
-  const backing = new TranslatorBacking(options);
-
-  let pending = 0; // Number of pending requests
-
-  // Patch the fetch() function to track number of pending requests
-  backing.fetch = async function(...args) {
-    try {
-      $('.app').classList.toggle('loading', ++pending > 0);
-      return await TranslatorBacking.prototype.fetch.call(backing, ...args);
-    } finally {
-      $('.app').classList.toggle('loading', --pending > 0);
-    }
-  };
-
-  // Wait for the language model registry to load. Once it is loaded, use
-  // it to fill the "from" and "to" language selection dropdowns.
-  await backing.registry.then(models => {
-    const names = new Intl.DisplayNames(['en'], {type: 'language'});
-
-    ['from', 'to'].forEach(field => {
-      const languages = new Set(models.map(model => model[field]));
-      const select = $(`#lang-${field}`);
-
-      const pairs = Array.from(languages, code => ({code, name: names.of(code)}));
-      
-      pairs.sort(({name: a}, {name: b}) => a.localeCompare(b));
-
-      pairs.forEach(({name, code}) => {
-        select.add(new Option(name, code));
-      })
-    });
-
-    $('#lang-from').value = 'en';
-    $('#lang-to').value = 'es';
-  });
-
-  // Intentionally do this after querying backing.registry to make sure that
-  // that request is fired off first. Now we can start thinking about loading
-  // the WASM binary etc.
-  const translator = new LatencyOptimisedTranslator(options, backing);
-
-  let abortController = new AbortController();
-
-  const translate = async () => {
-    try {
-      const from = $('#lang-from').value;
-      const to = $('#lang-to').value;
-      
-      // Querying models to see whether quality estimation is supported by all
-      // of them.
-      const models = await backing.getModels({from, to});
-      const qualityScores = models.every(model => 'qualityModel' in model.files);
-
-      $('.app').classList.add('translating');
-
-      const response = await translator.translate({
-        from,
-        to,
-        text: $('#input').innerHTML,
-        html: true,
-        qualityScores
-      }, {signal: abortController.signal});
-
-      $('#output').innerHTML = response.target.text;
-      $('#output').classList.toggle('has-quality-scores', qualityScores);
-
-      if (qualityScores)
-        addQualityIndicators();
-
-    } catch (error) {
-      // Ignore errors caused by changing the language pair (which triggers abort())
-      if (error.constructor === CancelledError) {
-        return;
-      }
-      
-      // Ignore 'errors' caused by typing too fast or by changing the language
-      // pair while a translation was still in progress (or being loaded)
-      if (error.constructor === SupersededError || error.constructor === CancelledError)
-        return;
-
-      // Ignore errors caused by selecting a bad pair (e.g. en -> en)
-      if (error.message.startsWith('No model available to translate from'))
-        return;
-
-      alert(`Error during translation: ${error}\n\n${error.stack}`);
-    } finally {
-      const worker = await Promise.race([translator.worker, Promise.resolve(null)]);
-      $('.app').classList.toggle('translating', worker === null || !worker.idle);
-    }
-  }
-
-  const reset = async () => {
-    // Cancel any pending loading/translation
-    abortController.abort();
-
-    // Reset abort controller to a fresh un-aborted one
-    abortController = new AbortController();
-
-    // Clear output to make it more clear something is happening
-    $('#output').innerHTML = '';
-
-    // Immediately start loading the new selection
-    translate();
-  }
-
-  $('button.swap').addEventListener('click', () => {
-    const tmp = $('#lang-from').value;
-    $('#lang-from').value = $('#lang-to').value;
-    $('#lang-to').value = tmp;
-    translate();
-  })
-
-  // Simple WYSIWYG controls
-  const editor = new Editor($('#input'));
-
-  // Translate on any change
-  $('#input').addEventListener('input', translate);
-  $('#lang-from').addEventListener('input', reset);
-  $('#lang-to').addEventListener('input', reset);
-
-  // Hook up sentence boundary highlighting if that information is available.
-  $('#output').addEventListener('mouseover', (e) => highlightSentence(e.target))
-
-  // Wait for bergamot-translator to load. This could throw a CompileError
-  // which we want to catch so we can show "oh noes browser not supported!"
-  translator.worker.catch(error => {
-    // Catch CompileErrors because for those we know what to do.
-    if (error.name === 'CompileError')
-      $('#unsupported-browser').hidden = false;
-    else
-      throw error;
-  });
-}
-
-main();
--- a/inference/wasm/test_page/logos.png
+++ b/inference/wasm/test_page/logos.png
--- a/inference/wasm/test_page/package-lock.json
+++ b/inference/wasm/test_page/package-lock.json
--- a/inference/wasm/test_page/package.json
+++ b/inference/wasm/test_page/package.json
@ -1,14 +0,0 @@
-{
-  "dependencies": {
-    "@browsermt/bergamot-translator": "file:../module",
-    "cors": "^2.8.5",
-    "express": "^4.18.2",
-    "nocache": "^2.1.0"
-  },
-  "config": {
-    "port": 80
-  },
-  "scripts": {
-    "start": "node ./bergamot-httpserver.js $npm_package_config_port 1 0"
-  }
-}
--- a/inference/wasm/test_page/start_server.sh
+++ b/inference/wasm/test_page/start_server.sh
@ -1,39 +0,0 @@
-#!/bin/bash
-
-usage="Copy wasm artifacts from the given folder and start httpserver
-
-Usage: $(basename "$0") [ARTIFACTS_SOURCE_FOLDER]
-
-    where:
-    ARTIFACTS_SOURCE_FOLDER    Directory containing pre-built wasm artifacts"
-
-SCRIPT_ABSOLUTE_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-
-if [ "$#" -ne 1 ]; then
-    echo "Illegal number of parameters passed"
-    echo "$usage"
-    exit
-fi
-
-# Check if ARTIFACTS_SOURCE_FOLDER is valid or not
-if [ ! -e "$1" ]; then
-    echo "Error: Folder \""$1"\" doesn't exist"
-    exit
-fi
-
-# Prepare a list all wasm artifacts to be copied and copy them to the destination folder
-ARTIFACTS_BASE_NAME="bergamot-translator-worker"
-ARTIFACTS="$1/$ARTIFACTS_BASE_NAME.js $1/$ARTIFACTS_BASE_NAME.wasm"
-ARTIFACTS_DESTINATION_FOLDER=$SCRIPT_ABSOLUTE_PATH/../module/worker
-
-for i in $ARTIFACTS; do
-    [ -f "$i" ] || breaks
-    cp $i $ARTIFACTS_DESTINATION_FOLDER
-    echo "Copied \"$i\" to \"$ARTIFACTS_DESTINATION_FOLDER\""
-done
-
-# Start http server
-(cd $SCRIPT_ABSOLUTE_PATH;
-npm install;
-echo "Start httpserver";
-node bergamot-httpserver.js 80 1 0)
--- a/inference/wasm/tests/.gitignore
+++ b/inference/wasm/tests/.gitignore
@ -0,0 +1 @@
+node_modules/
--- a/inference/wasm/tests/package-lock.json
+++ b/inference/wasm/tests/package-lock.json
--- a/inference/wasm/tests/package.json
+++ b/inference/wasm/tests/package.json
@ -0,0 +1,17 @@
+{
+  "name": "wasm-tests",
+  "version": "1.0.0",
+  "directories": {
+    "test": "tests"
+  },
+  "scripts": {
+    "test": "vitest --run",
+    "test:watch": "vitest"
+  },
+  "keywords": [],
+  "license": "MPL-2.0",
+  "description": "WASM tests for the inference engine.",
+  "devDependencies": {
+    "vitest": "^2.1.4"
+  }
+}
--- a/inference/wasm/tests/stub.test.js
+++ b/inference/wasm/tests/stub.test.js
@ -0,0 +1,7 @@
+import { describe, it, expect } from 'vitest';
+
+describe('Basic Test Suite', () => {
+  it('should pass a basic test', () => {
+    expect(1 + 1).toBe(2);
+  });
+});
--- a/taskcluster/kinds/inference/kind.yml
+++ b/taskcluster/kinds/inference/kind.yml
@ -44,7 +44,7 @@ tasks:
            task inference-build

  test-local:
-    description: "Run local-build tests for the inference engine"
+    description: "Run build-local C++ tests for the inference engine"
    dependencies:
      build: inference-build-local
    run-on-tasks-for: ["github-pull-request"]
@ -53,7 +53,7 @@ tasks:
        - bash
        - -c
        - >-
-            task inference-test
+            task inference-test-local

  build-wasm:
    description: "Build the wasm bindings for the inference engine"
@ -65,4 +65,16 @@ tasks:
        - bash
        - -c
        - >-
-            task inference-build-wasm
+            task inference-build-wasm
+
+  test-wasm:
+    description: "Run build-wasm JS tests for the inference engine"
+    dependencies:
+      build-wasm: inference-build-wasm
+    run-on-tasks-for: ["github-pull-request"]
+    run:
+      command:
+        - bash
+        - -c
+        - >-
+            task inference-test-wasm
--- a/utils/tasks/docker-run.py
+++ b/utils/tasks/docker-run.py
@ -3,6 +3,7 @@
 import argparse
 import subprocess
 import os
+import platform
 import sys


@ -43,6 +44,10 @@ def main():
        "/builds/worker/checkouts",
    ]

+    # Export the host operating system as an environment variable within the container.
+    host_os = platform.system()
+    docker_command.extend(["--env", f"HOST_OS={host_os}"])
+
    # Add additional volumes if provided
    if args.volume:
        for volume in args.volume: