Merge pull request #72 from microsoft/abhiram-embedding-fix
Fix to limit the memory usage when using fasttext embedding loaders
This commit is contained in:
Коммит
96b3015096
|
@ -35,8 +35,8 @@ def test_load_pretrained_vectors_glove(tmp_path):
|
|||
|
||||
@pytest.mark.smoke
|
||||
def test_load_pretrained_vectors_fasttext(tmp_path):
|
||||
filename = "wiki.en.bin"
|
||||
filename = "wiki.simple.bin"
|
||||
load_fasttext(tmp_path)
|
||||
filepath = os.path.join(os.path.join(tmp_path, "fastText"), filename)
|
||||
statinfo = os.stat(filepath)
|
||||
assert statinfo.st_size == 8493673445
|
||||
assert statinfo.st_size == 2668450750
|
||||
|
|
|
@ -57,7 +57,7 @@ def test_load_pretrained_vectors_glove():
|
|||
|
||||
def test_load_pretrained_vectors_fasttext():
|
||||
dir_path = "temp_data/"
|
||||
file_path = os.path.join(os.path.join(dir_path, "fastText"), "wiki.en.bin")
|
||||
file_path = os.path.join(os.path.join(dir_path, "fastText"), "wiki.simple.bin")
|
||||
|
||||
assert isinstance(load_fasttext(dir_path), FastText)
|
||||
|
||||
|
|
|
@ -2,5 +2,5 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
WORD2VEC_URL = 'https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz'
|
||||
FASTTEXT_EN_URL = 'https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.en.zip'
|
||||
FASTTEXT_EN_URL = 'https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.simple.zip'
|
||||
GLOVE_URL = 'http://nlp.stanford.edu/data/glove.840B.300d.zip'
|
||||
|
|
|
@ -32,10 +32,14 @@ def _extract_fasttext_vectors(zip_path, dest_path="."):
|
|||
return dest_path
|
||||
|
||||
|
||||
def _download_fasttext_vectors(download_dir, file_name="wiki.en.zip"):
|
||||
def _download_fasttext_vectors(download_dir, file_name="wiki.simple.zip"):
|
||||
""" Downloads pre-trained word vectors for English, trained on Wikipedia using
|
||||
fastText. You can directly download the vectors from here:
|
||||
https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.en.zip
|
||||
https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.simple.zip
|
||||
|
||||
For the full version of pre-trained word vectors, change the url for
|
||||
FASTTEXT_EN_URL to https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.en.zip
|
||||
in __init__.py
|
||||
|
||||
Args:
|
||||
download_dir (str): File path to download the file
|
||||
|
@ -75,7 +79,7 @@ def _maybe_download_and_extract(dest_path, file_name):
|
|||
return file_path
|
||||
|
||||
|
||||
def load_pretrained_vectors(dest_path, file_name="wiki.en.bin"):
|
||||
def load_pretrained_vectors(dest_path, file_name="wiki.simple.bin"):
|
||||
""" Method that loads fastText vectors. Downloads if it doesn't exist.
|
||||
|
||||
Args:
|
||||
|
|
Загрузка…
Ссылка в новой задаче