Add Marian server for model testing (#492)

* Compile marian server * Add Marian server for testing * Reformat * Update utils/marian_client.py Co-authored-by: Greg Tatum <gregtatum@users.noreply.github.com> * Make port configurable * Relock poetry --------- Co-authored-by: Greg Tatum <gregtatum@users.noreply.github.com>
2024-03-28 15:53:16 -07:00 · 2024-03-28 15:53:16 -07:00 · 3774779cb7
--- a/52
+++ b/52
@ -5,6 +5,11 @@ SHELL=/bin/bash

 # task group id for downloading evals and logs
 LOGS_TASK_GROUP?=
+# An ID of a Taskcluster task with a Marian model in the artifacts
+MODEL_TASK?=
+# A command to run with run-docker
+DOCKER_COMMAND=bash
+MARIAN_SERVER_PORT=8886

 # OpusCleaner is a data cleaner for training corpus
 # More details are in docs/cleaning.md
@ -103,27 +108,26 @@ run-docker:
 		--rm \
 		--volume $$(pwd):/builds/worker/checkouts \
 		--workdir /builds/worker/checkouts \
-		ftt-local bash
+		-p $(MARIAN_SERVER_PORT):$(MARIAN_SERVER_PORT) \
+		ftt-local $(DOCKER_COMMAND)

 # Run tests under Docker
-run-tests-docker: build-docker
-run-tests-docker:
-	# this is a mitigation to guard against build failures with the new Apple ARM processors
-	if [ -n "$$VIRTUAL_ENV" ]; then \
-		echo "Error: Virtual environment detected. Exit the poetry shell."; \
-		exit 1; \
-	fi && \
-	if [ $$(uname -m) == 'arm64' ]; then \
-		echo "setting arm64 platform"; \
-	  	export DOCKER_DEFAULT_PLATFORM=linux/amd64; \
-	fi && \
-	docker run \
-		--interactive \
-		--tty \
-		--rm \
-		--volume $$(pwd):/builds/worker/checkouts \
-		--workdir /builds/worker/checkouts \
-		 ftt-local make run-tests
+run-tests-docker: DOCKER_COMMAND="make run-tests"
+run-tests-docker: run-docker
+
+# Run Marian server that loads a model from data/models/$MODEL_TASK
+# For example:
+# MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make download-model
+# MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make run-server-docker
+# Then run `python utils/marian_client.py` to test the model
+# It will be slow on a CPU under Docker
+run-server-docker: DOCKER_COMMAND=/builds/worker/tools/marian-dev/build/marian-server \
+  -c /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/decoder.yml \
+  -m /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/model.npz \
+  -v /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/vocab.spm /builds/worker/checkouts/data/taskcluster-models/$(MODEL_TASK)/vocab.spm \
+  --port $(MARIAN_SERVER_PORT)
+run-server-docker: run-docker
+

 # Validates Taskcluster task graph locally
 validate-taskgraph:
@ -155,13 +159,21 @@ download-logs:
 # Downloads evaluation results from Taskcluster task group to a CSV file
 # This includes BLEU and chrF metrics for each dataset and trained model
 download-evals:
-	mkdir -p data/taskcluster-logs
+	mkdir -p data/taskcluster-evals
 	poetry install --only taskcluster --no-root
 	poetry run python utils/taskcluster_downloader.py \
 		--output=data/taskcluster-evals/$(LOGS_TASK_GROUP) \
 		--mode=evals \
 		--task-group-id=$(LOGS_TASK_GROUP)

+# Downloads a trained model from the Taskcluster task artifacts
+# For example: `MODEL_TASK=ZP5V73iKTM2HCFQsCU-JBQ make download-model`
+download-model:
+	mkdir -p data/taskcluster-models/$(MODEL_TASK)
+	wget -O data/taskcluster-models/$(MODEL_TASK)/decoder.yml https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fmodel.npz.best-chrf.npz.decoder.yml
+	wget -O data/taskcluster-models/$(MODEL_TASK)/model.npz https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fmodel.npz.best-chrf.npz
+	wget -O data/taskcluster-models/$(MODEL_TASK)/vocab.spm https://firefox-ci-tc.services.mozilla.com/api/queue/v1/task/$(MODEL_TASK)/runs/0/artifacts/public%2Fbuild%2Fvocab.spm
+

 # Runs Tensorboard for Marian training logs in ./logs directory
 # then go to http://localhost:6006
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -25,6 +25,8 @@ RUN apt-get update -qq \
                          libhunspell-dev \
                          bc \
                          libopenblas-dev \
+                          openssl \
+                          libssl-dev  \
         && apt-get clean

 RUN mkdir /builds/worker/tools && \
--- a/pipeline/setup/compile-marian.sh
+++ b/pipeline/setup/compile-marian.sh
@ -17,12 +17,14 @@ mkdir -p "${marian_dir}"
 cd "${marian_dir}"

 if [ "${use_gpu}" == "true" ]; then
+  # this is a production version that runs on GPU
  test -v CUDA_DIR
  cmake .. -DUSE_SENTENCEPIECE=on -DUSE_FBGEMM=on -DCOMPILE_CPU=on -DCMAKE_BUILD_TYPE=Release \
    -DCUDA_TOOLKIT_ROOT_DIR="${CUDA_DIR}" "${extra_args[@]}"
 else
+  # this is a CPU version that we use for testing
  cmake .. -DUSE_SENTENCEPIECE=on -DUSE_FBGEMM=on -DCOMPILE_CPU=on -DCMAKE_BUILD_TYPE=Release \
-    -DCOMPILE_CUDA=off "${extra_args[@]}"
+    -DCOMPILE_CUDA=off -DCOMPILE_SERVER=on "${extra_args[@]}"
 fi

 make -j "${threads}"
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -32,6 +32,7 @@ requests="2.26.0"
 humanize = "^4.9.0"
 blessed = "^1.20.0"
 huggingface-hub = "^0.20.3"
+websocket_client ="*"

 [tool.poetry.group.tests.dependencies]
 sacrebleu="2.0.0"
--- a/utils/marian_client.py
+++ b/utils/marian_client.py
@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+"""
+A client that connects to a Marian server and translates text interactively.
+Run `python utils.marian_client.py` and type a text to translate in the terminal
+Source: https://github.com/marian-nmt/marian-dev/blob/master/scripts/server/client_example.py
+"""
+
+
+from __future__ import division, print_function, unicode_literals
+
+import argparse
+import sys
+
+from websocket import create_connection
+
+if __name__ == "__main__":
+    # handle command-line options
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawTextHelpFormatter,  # Preserves whitespace in the help text.
+    )
+    parser.add_argument("-b", "--batch-size", type=int, default=1)
+    parser.add_argument("-p", "--port", type=int, default=8886)
+    args = parser.parse_args()
+
+    # open connection
+    ws = create_connection(f"ws://localhost:{args.port}/translate")
+
+    count = 0
+    batch = ""
+    for line in sys.stdin:
+        count += 1
+        batch += line.decode("utf-8") if sys.version_info < (3, 0) else line
+        if count == args.batch_size:
+            # translate the batch
+            ws.send(batch)
+            result = ws.recv()
+            print(result.rstrip())
+
+            count = 0
+            batch = ""
+
+    if count:
+        # translate the remaining sentences
+        ws.send(batch)
+        result = ws.recv()
+        print(result.rstrip())
+
+    # close connection
+    ws.close()