From 9575a43e3ed1e480d638b5ab52ae39375b602ff2 Mon Sep 17 00:00:00 2001 From: Ayush Shridhar Date: Sat, 28 Sep 2019 15:21:28 +0530 Subject: [PATCH] Compress similarity model (#986) Fixes #939 --- bugbug/similarity.py | 4 +++- scripts/similarity_trainer.py | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bugbug/similarity.py b/bugbug/similarity.py index 73efe2f0..7ff294bf 100644 --- a/bugbug/similarity.py +++ b/bugbug/similarity.py @@ -177,7 +177,9 @@ class BaseSimilarity(abc.ABC): return def save(self): - joblib.dump(self, f"{self.__class__.__name__.lower()}.similaritymodel") + path = f"{self.__class__.__name__.lower()}.similaritymodel" + joblib.dump(self, path) + return path @staticmethod def load(model_file_name): diff --git a/scripts/similarity_trainer.py b/scripts/similarity_trainer.py index c2d66d03..90fe65c2 100644 --- a/scripts/similarity_trainer.py +++ b/scripts/similarity_trainer.py @@ -5,12 +5,14 @@ # You can obtain one at http://mozilla.org/MPL/2.0/. import argparse +import os import sys from logging import INFO, basicConfig, getLogger from sklearn.feature_extraction.text import TfidfVectorizer from bugbug import bugzilla, db, similarity +from bugbug.utils import zstd_compress basicConfig(level=INFO) logger = getLogger(__name__) @@ -58,7 +60,9 @@ def main(): cleanup_urls=args.cleanup_urls, nltk_tokenizer=args.nltk_tokenizer ) - model.save() + path = model.save() + assert os.path.exists(path) + zstd_compress(path) if __name__ == "__main__":