Streamline memory-bundle loads (#307)

Provides an additional constructor which takes care of the bundle loading inside the boundary of the source here, when a configuration file is supplied from a client like translateLocally or python bindings. Once the config file is read, we have access to the information required to construct the MemoryBundle. - The command-line application supplied from here, app/bergamot is configured to use the fast-load path now. - Changes to binary-loading additionally revealed a bug in the example-run script used in docs and tied to CI and the fix is included. - Shortlist is made optional in the memory bundle, making changes to getModelMemoryFromConfig. Fixes #304. Fixes #306. See also: XapaJIaMnu/translateLocally#82.
2022-01-19 16:36:48 +00:00 · 2022-01-19 16:36:48 +00:00 · 7099b9e9ad
--- a/app/bergamot.cpp
+++ b/app/bergamot.cpp
@ -16,8 +16,7 @@ int main(int argc, char *argv[]) {
  // Construct a model.
  auto options = parseOptionsFromFilePath(config.modelConfigPaths.front());

-  MemoryBundle memoryBundle;
-  std::shared_ptr<TranslationModel> model = service.createCompatibleModel(options, std::move(memoryBundle));
+  std::shared_ptr<TranslationModel> model = service.createCompatibleModel(options);

  ResponseOptions responseOptions;
  std::string input = readFromStdin();
--- a/examples/run-native.sh
+++ b/examples/run-native.sh
@ -9,7 +9,7 @@ wget --quiet --continue --directory models/ \
 # Patch the config-files generated from marian for use in bergamot.
 python3 bergamot-translator-tests/tools/patch-marian-for-bergamot.py \
    --config-path models/ende.student.tiny11/config.intgemm8bitalpha.yml \
-    --ssplit-prefix-file 3rd-party/ssplit-cpp/split-cpp/nonbreaking_prefixes/nonbreaking_prefix.en
+    --ssplit-prefix-file $(realpath 3rd_party/ssplit-cpp/nonbreaking_prefixes/nonbreaking_prefix.en)

 # Patched config file will be available with .bergamot.yml suffix.
 CONFIG=models/ende.student.tiny11/config.intgemm8bitalpha.yml.bergamot.yml
--- a/src/translator/byte_array_util.cpp
+++ b/src/translator/byte_array_util.cpp
@ -101,10 +101,12 @@ AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {

 AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options) {
  auto shortlist = options->get<std::vector<std::string>>("shortlist");
-  ABORT_IF(shortlist.empty(), "No path to shortlist file is given.");
-  ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]),
-           "Loading non-binary shortlist file into memory is not supported");
-  return loadFileToMemory(shortlist[0], 64);
+  if (!shortlist.empty()) {
+    ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]),
+             "Loading non-binary shortlist file into memory is not supported");
+    return loadFileToMemory(shortlist[0], 64);
+  }
+  return AlignedMemory();
 }

 void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
--- a/src/translator/service.h
+++ b/src/translator/service.h
@ -127,10 +127,9 @@ class AsyncService {

  /// Create a TranslationModel compatible with this instance of Service. Internally assigns how many replicas of
  /// backend needed based on worker threads set. See TranslationModel for documentation on other params.
-  template <class ConfigType>
-  Ptr<TranslationModel> createCompatibleModel(const ConfigType &config, MemoryBundle &&memory = MemoryBundle{}) {
+  Ptr<TranslationModel> createCompatibleModel(const TranslationModel::Config &config) {
    // @TODO: Remove this remove this dependency/coupling.
-    return New<TranslationModel>(config, std::move(memory), /*replicas=*/config_.numWorkers);
+    return New<TranslationModel>(config, /*replicas=*/config_.numWorkers);
  }

  /// With the supplied TranslationModel, translate an input. A Response is constructed with optional items set/unset
--- a/src/translator/translation_model.cpp
+++ b/src/translator/translation_model.cpp
@ -27,22 +27,25 @@ TranslationModel::TranslationModel(const Config &options, MemoryBundle &&memory
  ABORT_IF(replicas == 0, "At least one replica needs to be created.");
  backend_.resize(replicas);

-  if (options_->hasAndNotEmpty("shortlist")) {
-    int srcIdx = 0, trgIdx = 1;
-    bool shared_vcb =
-        vocabs_.sources().front() ==
-        vocabs_.target();  // vocabs_->sources().front() is invoked as we currently only support one source vocab
-    if (memory_.shortlist.size() > 0 && memory_.shortlist.begin() != nullptr) {
-      bool check = options_->get<bool>("check-bytearray", false);
-      shortlistGenerator_ = New<data::BinaryShortlistGenerator>(memory_.shortlist.begin(), memory_.shortlist.size(),
-                                                                vocabs_.sources().front(), vocabs_.target(), srcIdx,
-                                                                trgIdx, shared_vcb, check);
-    } else {
-      // Changed to BinaryShortlistGenerator to enable loading binary shortlist file
-      // This class also supports text shortlist file
-      shortlistGenerator_ = New<data::BinaryShortlistGenerator>(options_, vocabs_.sources().front(), vocabs_.target(),
-                                                                srcIdx, trgIdx, shared_vcb);
-    }
+  // Try to load shortlist from memory-bundle. If not available, try to load from options_;
+
+  int srcIdx = 0, trgIdx = 1;
+  // vocabs_->sources().front() is invoked as we currently only support one source vocab
+  bool shared_vcb = (vocabs_.sources().front() == vocabs_.target());
+
+  if (memory_.shortlist.size() > 0 && memory_.shortlist.begin() != nullptr) {
+    bool check = options_->get<bool>("check-bytearray", false);
+    shortlistGenerator_ = New<data::BinaryShortlistGenerator>(memory_.shortlist.begin(), memory_.shortlist.size(),
+                                                              vocabs_.sources().front(), vocabs_.target(), srcIdx,
+                                                              trgIdx, shared_vcb, check);
+  } else if (options_->hasAndNotEmpty("shortlist")) {
+    // Changed to BinaryShortlistGenerator to enable loading binary shortlist file
+    // This class also supports text shortlist file
+    shortlistGenerator_ = New<data::BinaryShortlistGenerator>(options_, vocabs_.sources().front(), vocabs_.target(),
+                                                              srcIdx, trgIdx, shared_vcb);
+  } else {
+    // In this case, the loadpath does not load shortlist.
+    shortlistGenerator_ = nullptr;
  }
 }

--- a/src/translator/translation_model.h
+++ b/src/translator/translation_model.h
@ -6,6 +6,7 @@

 #include "batch.h"
 #include "batching_pool.h"
+#include "byte_array_util.h"
 #include "cache.h"
 #include "common/utils.h"
 #include "data/shortlist.h"
@ -56,7 +57,10 @@ class TranslationModel {
  /// @param [in] options: Marian options object.
  /// @param [in] memory: MemoryBundle object holding memory buffers containing parameters to build MarianBackend,
  /// ShortlistGenerator, Vocabs and SentenceSplitter.
-  TranslationModel(const Config& options, MemoryBundle&& memory = MemoryBundle{}, size_t replicas = 1);
+  TranslationModel(const Config& options, MemoryBundle&& memory, size_t replicas = 1);
+
+  TranslationModel(const Config& options, size_t replicas = 1)
+      : TranslationModel(options, getMemoryBundleFromConfig(options), replicas) {}

  /// Make a Request to be translated by this TranslationModel instance.
  /// @param [in] requestId: Unique identifier associated with this request, available from Service.