Streamline memory-bundle loads (#307)
Provides an additional constructor which takes care of the bundle loading inside the boundary of the source here, when a configuration file is supplied from a client like translateLocally or python bindings. Once the config file is read, we have access to the information required to construct the MemoryBundle. - The command-line application supplied from here, app/bergamot is configured to use the fast-load path now. - Changes to binary-loading additionally revealed a bug in the example-run script used in docs and tied to CI and the fix is included. - Shortlist is made optional in the memory bundle, making changes to getModelMemoryFromConfig. Fixes #304. Fixes #306. See also: XapaJIaMnu/translateLocally#82.
This commit is contained in:
Родитель
acbc46d816
Коммит
7099b9e9ad
|
@ -16,8 +16,7 @@ int main(int argc, char *argv[]) {
|
|||
// Construct a model.
|
||||
auto options = parseOptionsFromFilePath(config.modelConfigPaths.front());
|
||||
|
||||
MemoryBundle memoryBundle;
|
||||
std::shared_ptr<TranslationModel> model = service.createCompatibleModel(options, std::move(memoryBundle));
|
||||
std::shared_ptr<TranslationModel> model = service.createCompatibleModel(options);
|
||||
|
||||
ResponseOptions responseOptions;
|
||||
std::string input = readFromStdin();
|
||||
|
|
|
@ -9,7 +9,7 @@ wget --quiet --continue --directory models/ \
|
|||
# Patch the config-files generated from marian for use in bergamot.
|
||||
python3 bergamot-translator-tests/tools/patch-marian-for-bergamot.py \
|
||||
--config-path models/ende.student.tiny11/config.intgemm8bitalpha.yml \
|
||||
--ssplit-prefix-file 3rd-party/ssplit-cpp/split-cpp/nonbreaking_prefixes/nonbreaking_prefix.en
|
||||
--ssplit-prefix-file $(realpath 3rd_party/ssplit-cpp/nonbreaking_prefixes/nonbreaking_prefix.en)
|
||||
|
||||
# Patched config file will be available with .bergamot.yml suffix.
|
||||
CONFIG=models/ende.student.tiny11/config.intgemm8bitalpha.yml.bergamot.yml
|
||||
|
|
|
@ -101,11 +101,13 @@ AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {
|
|||
|
||||
AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options) {
|
||||
auto shortlist = options->get<std::vector<std::string>>("shortlist");
|
||||
ABORT_IF(shortlist.empty(), "No path to shortlist file is given.");
|
||||
if (!shortlist.empty()) {
|
||||
ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]),
|
||||
"Loading non-binary shortlist file into memory is not supported");
|
||||
return loadFileToMemory(shortlist[0], 64);
|
||||
}
|
||||
return AlignedMemory();
|
||||
}
|
||||
|
||||
void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
|
||||
std::vector<std::shared_ptr<AlignedMemory>>& vocabMemories) {
|
||||
|
|
|
@ -127,10 +127,9 @@ class AsyncService {
|
|||
|
||||
/// Create a TranslationModel compatible with this instance of Service. Internally assigns how many replicas of
|
||||
/// backend needed based on worker threads set. See TranslationModel for documentation on other params.
|
||||
template <class ConfigType>
|
||||
Ptr<TranslationModel> createCompatibleModel(const ConfigType &config, MemoryBundle &&memory = MemoryBundle{}) {
|
||||
Ptr<TranslationModel> createCompatibleModel(const TranslationModel::Config &config) {
|
||||
// @TODO: Remove this remove this dependency/coupling.
|
||||
return New<TranslationModel>(config, std::move(memory), /*replicas=*/config_.numWorkers);
|
||||
return New<TranslationModel>(config, /*replicas=*/config_.numWorkers);
|
||||
}
|
||||
|
||||
/// With the supplied TranslationModel, translate an input. A Response is constructed with optional items set/unset
|
||||
|
|
|
@ -27,22 +27,25 @@ TranslationModel::TranslationModel(const Config &options, MemoryBundle &&memory
|
|||
ABORT_IF(replicas == 0, "At least one replica needs to be created.");
|
||||
backend_.resize(replicas);
|
||||
|
||||
if (options_->hasAndNotEmpty("shortlist")) {
|
||||
// Try to load shortlist from memory-bundle. If not available, try to load from options_;
|
||||
|
||||
int srcIdx = 0, trgIdx = 1;
|
||||
bool shared_vcb =
|
||||
vocabs_.sources().front() ==
|
||||
vocabs_.target(); // vocabs_->sources().front() is invoked as we currently only support one source vocab
|
||||
// vocabs_->sources().front() is invoked as we currently only support one source vocab
|
||||
bool shared_vcb = (vocabs_.sources().front() == vocabs_.target());
|
||||
|
||||
if (memory_.shortlist.size() > 0 && memory_.shortlist.begin() != nullptr) {
|
||||
bool check = options_->get<bool>("check-bytearray", false);
|
||||
shortlistGenerator_ = New<data::BinaryShortlistGenerator>(memory_.shortlist.begin(), memory_.shortlist.size(),
|
||||
vocabs_.sources().front(), vocabs_.target(), srcIdx,
|
||||
trgIdx, shared_vcb, check);
|
||||
} else {
|
||||
} else if (options_->hasAndNotEmpty("shortlist")) {
|
||||
// Changed to BinaryShortlistGenerator to enable loading binary shortlist file
|
||||
// This class also supports text shortlist file
|
||||
shortlistGenerator_ = New<data::BinaryShortlistGenerator>(options_, vocabs_.sources().front(), vocabs_.target(),
|
||||
srcIdx, trgIdx, shared_vcb);
|
||||
}
|
||||
} else {
|
||||
// In this case, the loadpath does not load shortlist.
|
||||
shortlistGenerator_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "batch.h"
|
||||
#include "batching_pool.h"
|
||||
#include "byte_array_util.h"
|
||||
#include "cache.h"
|
||||
#include "common/utils.h"
|
||||
#include "data/shortlist.h"
|
||||
|
@ -56,7 +57,10 @@ class TranslationModel {
|
|||
/// @param [in] options: Marian options object.
|
||||
/// @param [in] memory: MemoryBundle object holding memory buffers containing parameters to build MarianBackend,
|
||||
/// ShortlistGenerator, Vocabs and SentenceSplitter.
|
||||
TranslationModel(const Config& options, MemoryBundle&& memory = MemoryBundle{}, size_t replicas = 1);
|
||||
TranslationModel(const Config& options, MemoryBundle&& memory, size_t replicas = 1);
|
||||
|
||||
TranslationModel(const Config& options, size_t replicas = 1)
|
||||
: TranslationModel(options, getMemoryBundleFromConfig(options), replicas) {}
|
||||
|
||||
/// Make a Request to be translated by this TranslationModel instance.
|
||||
/// @param [in] requestId: Unique identifier associated with this request, available from Service.
|
||||
|
|
Загрузка…
Ссылка в новой задаче