Bug 1861516 - Remove Translations fastText third-party files r=sylvestre,gregtatum

Removes all third-party files related to fastText since it is
no longer used in this code base.

Differential Revision: https://phabricator.services.mozilla.com/D192659
This commit is contained in:
Erik Nordin 2023-11-07 01:34:02 +00:00
Родитель 0409e6b9c7
Коммит c90f8889ed
8 изменённых файлов: 0 добавлений и 617 удалений

Просмотреть файл

@ -1424,8 +1424,6 @@ toolkit/components/passwordmgr/PasswordRulesParser.sys.mjs
toolkit/components/protobuf/
toolkit/components/translation/cld2/
toolkit/components/translations/bergamot-translator
toolkit/components/translations/fasttext/fasttext.js
toolkit/components/translations/fasttext/fasttext_wasm.js
toolkit/components/url-classifier/chromium/
toolkit/components/utils/mozjexl.js
toolkit/components/viaduct/fetch_msg_types.pb.cc

Просмотреть файл

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2016-present, Facebook, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Просмотреть файл

@ -1,536 +0,0 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
let fastTextModule;
const _initFastTextModule = async function (wasmModule) {
try {
fastTextModule = await loadFastTextModule(wasmModule);
} catch(e) {
console.error(e);
}
return true
}
let postRunFunc = null;
const addOnPostRun = function (func) {
postRunFunc = func;
};
const loadFastText = (wasmModule) => {
_initFastTextModule(wasmModule).then((res) => {
if (postRunFunc) {
postRunFunc();
}
})
}
const thisModule = this;
const trainFileInWasmFs = 'train.txt';
const testFileInWasmFs = 'test.txt';
const modelFileInWasmFs = 'model.bin';
const getFloat32ArrayFromHeap = (len) => {
const dataBytes = len * Float32Array.BYTES_PER_ELEMENT;
const dataPtr = fastTextModule._malloc(dataBytes);
const dataHeap = new Uint8Array(fastTextModule.HEAPU8.buffer,
dataPtr,
dataBytes);
return {
'ptr':dataHeap.byteOffset,
'size':len,
'buffer':dataHeap.buffer
};
};
const heapToFloat32 = (r) => new Float32Array(r.buffer, r.ptr, r.size);
class FastText {
constructor(fastTextModule) {
this.f = new fastTextModule.FastText();
}
/**
* loadModel
*
* Loads the model file from the specified url, and returns the
* corresponding `FastTextModel` object.
*
* @param {string} url
* the url of the model file.
*
* @return {Promise} promise object that resolves to a `FastTextModel`
*
*/
loadModel(url) {
const fetchFunc = (thisModule && thisModule.fetch) || fetch;
const fastTextNative = this.f;
return new Promise(function(resolve, reject) {
fetchFunc(url).then(response => {
return response.arrayBuffer();
}).then(bytes => {
const byteArray = new Uint8Array(bytes);
const FS = fastTextModule.FS;
FS.writeFile(modelFileInWasmFs, byteArray);
}).then(() => {
fastTextNative.loadModel(modelFileInWasmFs);
resolve(new FastTextModel(fastTextNative));
}).catch(error => {
reject(error);
});
});
}
loadModelBinary(buffer) {
const fastTextNative = this.f;
const byteArray = new Uint8Array(buffer);
const FS = fastTextModule.FS;
FS.writeFile(modelFileInWasmFs, byteArray);
fastTextNative.loadModel(modelFileInWasmFs);
return new FastTextModel(fastTextNative);
}
_train(url, modelName, kwargs = {}, callback = null) {
const fetchFunc = (thisModule && thisModule.fetch) || fetch;
const fastTextNative = this.f;
return new Promise(function(resolve, reject) {
fetchFunc(url).then(response => {
return response.arrayBuffer();
}).then(bytes => {
const byteArray = new Uint8Array(bytes);
const FS = fastTextModule.FS;
FS.writeFile(trainFileInWasmFs, byteArray);
}).then(() => {
const argsList = ['lr', 'lrUpdateRate', 'dim', 'ws', 'epoch',
'minCount', 'minCountLabel', 'neg', 'wordNgrams', 'loss',
'model', 'bucket', 'minn', 'maxn', 't', 'label', 'verbose',
'pretrainedVectors', 'saveOutput', 'seed', 'qout', 'retrain',
'qnorm', 'cutoff', 'dsub', 'qnorm', 'autotuneValidationFile',
'autotuneMetric', 'autotunePredictions', 'autotuneDuration',
'autotuneModelSize'];
const args = new fastTextModule.Args();
argsList.forEach(k => {
if (k in kwargs) {
args[k] = kwargs[k];
}
});
args.model = fastTextModule.ModelName[modelName];
args.loss = ('loss' in kwargs) ?
fastTextModule.LossName[kwargs['loss']] : 'hs';
args.thread = 1;
args.input = trainFileInWasmFs;
fastTextNative.train(args, callback);
resolve(new FastTextModel(fastTextNative));
}).catch(error => {
reject(error);
});
});
}
/**
* trainSupervised
*
* Downloads the input file from the specified url, trains a supervised
* model and returns a `FastTextModel` object.
*
* @param {string} url
* the url of the input file.
* The input file must must contain at least one label per line. For an
* example consult the example datasets which are part of the fastText
* repository such as the dataset pulled by classification-example.sh.
*
* @param {dict} kwargs
* train parameters.
* For example {'lr': 0.5, 'epoch': 5}
*
* @param {function} callback
* train callback function
* `callback` function is called regularly from the train loop:
* `callback(progress, loss, wordsPerSec, learningRate, eta)`
*
* @return {Promise} promise object that resolves to a `FastTextModel`
*
*/
trainSupervised(url, kwargs = {}, callback) {
const self = this;
return new Promise(function(resolve, reject) {
self._train(url, 'supervised', kwargs, callback).then(model => {
resolve(model);
}).catch(error => {
reject(error);
});
});
}
/**
* trainUnsupervised
*
* Downloads the input file from the specified url, trains an unsupervised
* model and returns a `FastTextModel` object.
*
* @param {string} url
* the url of the input file.
* The input file must not contain any labels or use the specified label
* prefixunless it is ok for those words to be ignored. For an example
* consult the dataset pulled by the example script word-vector-example.sh
* which is part of the fastText repository.
*
* @param {string} modelName
* Model to be used for unsupervised learning. `cbow` or `skipgram`.
*
* @param {dict} kwargs
* train parameters.
* For example {'lr': 0.5, 'epoch': 5}
*
* @param {function} callback
* train callback function
* `callback` function is called regularly from the train loop:
* `callback(progress, loss, wordsPerSec, learningRate, eta)`
*
* @return {Promise} promise object that resolves to a `FastTextModel`
*
*/
trainUnsupervised(url, modelName, kwargs = {}, callback) {
const self = this;
return new Promise(function(resolve, reject) {
self._train(url, modelName, kwargs, callback).then(model => {
resolve(model);
}).catch(error => {
reject(error);
});
});
}
}
class FastTextModel {
/**
* `FastTextModel` represents a trained model.
*
* @constructor
*
* @param {object} fastTextNative
* webassembly object that makes the bridge between js and C++
*/
constructor(fastTextNative) {
this.f = fastTextNative;
}
/**
* isQuant
*
* @return {bool} true if the model is quantized
*
*/
isQuant() {
return this.f.isQuant;
}
/**
* getDimension
*
* @return {int} the dimension (size) of a lookup vector (hidden layer)
*
*/
getDimension() {
return this.f.args.dim;
}
/**
* getWordVector
*
* @param {string} word
*
* @return {Float32Array} the vector representation of `word`.
*
*/
getWordVector(word) {
const b = getFloat32ArrayFromHeap(this.getDimension());
this.f.getWordVector(b, word);
return heapToFloat32(b);
}
/**
* getSentenceVector
*
* @param {string} text
*
* @return {Float32Array} the vector representation of `text`.
*
*/
getSentenceVector(text) {
if (text.indexOf('\n') != -1) {
"sentence vector processes one line at a time (remove '\\n')";
}
text += '\n';
const b = getFloat32ArrayFromHeap(this.getDimension());
this.f.getSentenceVector(b, text);
return heapToFloat32(b);
}
/**
* getNearestNeighbors
*
* returns the nearest `k` neighbors of `word`.
*
* @param {string} word
* @param {int} k
*
* @return {Array.<Pair.<number, string>>}
* words and their corresponding cosine similarities.
*
*/
getNearestNeighbors(word, k = 10) {
return this.f.getNN(word, k);
}
/**
* getAnalogies
*
* returns the nearest `k` neighbors of the operation
* `wordA - wordB + wordC`.
*
* @param {string} wordA
* @param {string} wordB
* @param {string} wordC
* @param {int} k
*
* @return {Array.<Pair.<number, string>>}
* words and their corresponding cosine similarities
*
*/
getAnalogies(wordA, wordB, wordC, k) {
return this.f.getAnalogies(k, wordA, wordB, wordC);
}
/**
* getWordId
*
* Given a word, get the word id within the dictionary.
* Returns -1 if word is not in the dictionary.
*
* @return {int} word id
*
*/
getWordId(word) {
return this.f.getWordId(word);
}
/**
* getSubwordId
*
* Given a subword, return the index (within input matrix) it hashes to.
*
* @return {int} subword id
*
*/
getSubwordId(subword) {
return this.f.getSubwordId(subword);
}
/**
* getSubwords
*
* returns the subwords and their indicies.
*
* @param {string} word
*
* @return {Pair.<Array.<string>, Array.<int>>}
* words and their corresponding indicies
*
*/
getSubwords(word) {
return this.f.getSubwords(word);
}
/**
* getInputVector
*
* Given an index, get the corresponding vector of the Input Matrix.
*
* @param {int} ind
*
* @return {Float32Array} the vector of the `ind`'th index
*
*/
getInputVector(ind) {
const b = getFloat32ArrayFromHeap(this.getDimension());
this.f.getInputVector(b, ind);
return heapToFloat32(b);
}
/**
* predict
*
* Given a string, get a list of labels and a list of corresponding
* probabilities. k controls the number of returned labels.
*
* @param {string} text
* @param {int} k, the number of predictions to be returned
* @param {number} probability threshold
*
* @return {Array.<Pair.<number, string>>}
* labels and their probabilities
*
*/
predict(text, k = 1, threshold = 0.0) {
return this.f.predict(text, k, threshold);
}
/**
* getInputMatrix
*
* Get a reference to the full input matrix of a Model. This only
* works if the model is not quantized.
*
* @return {DenseMatrix}
* densematrix with functions: `rows`, `cols`, `at(i,j)`
*
* example:
* let inputMatrix = model.getInputMatrix();
* let value = inputMatrix.at(1, 2);
*/
getInputMatrix() {
if (this.isQuant()) {
throw new Error("Can't get quantized Matrix");
}
return this.f.getInputMatrix();
}
/**
* getOutputMatrix
*
* Get a reference to the full input matrix of a Model. This only
* works if the model is not quantized.
*
* @return {DenseMatrix}
* densematrix with functions: `rows`, `cols`, `at(i,j)`
*
* example:
* let outputMatrix = model.getOutputMatrix();
* let value = outputMatrix.at(1, 2);
*/
getOutputMatrix() {
if (this.isQuant()) {
throw new Error("Can't get quantized Matrix");
}
return this.f.getOutputMatrix();
}
/**
* getWords
*
* Get the entire list of words of the dictionary including the frequency
* of the individual words. This does not include any subwords. For that
* please consult the function get_subwords.
*
* @return {Pair.<Array.<string>, Array.<int>>}
* words and their corresponding frequencies
*
*/
getWords() {
return this.f.getWords();
}
/**
* getLabels
*
* Get the entire list of labels of the dictionary including the frequency
* of the individual labels.
*
* @return {Pair.<Array.<string>, Array.<int>>}
* labels and their corresponding frequencies
*
*/
getLabels() {
return this.f.getLabels();
}
/**
* getLine
*
* Split a line of text into words and labels. Labels must start with
* the prefix used to create the model (__label__ by default).
*
* @param {string} text
*
* @return {Pair.<Array.<string>, Array.<string>>}
* words and labels
*
*/
getLine(text) {
return this.f.getLine(text);
}
/**
* saveModel
*
* Saves the model file in web assembly in-memory FS and returns a blob
*
* @return {Blob} blob data of the file saved in web assembly FS
*
*/
saveModel() {
this.f.saveModel(modelFileInWasmFs);
const content = fastTextModule.FS.readFile(modelFileInWasmFs,
{ encoding: 'binary' });
return new Blob(
[new Uint8Array(content, content.byteOffset, content.length)],
{ type: ' application/octet-stream' }
);
}
/**
* test
*
* Downloads the test file from the specified url, evaluates the supervised
* model with it.
*
* @param {string} url
* @param {int} k, the number of predictions to be returned
* @param {number} probability threshold
*
* @return {Promise} promise object that resolves to a `Meter` object
*
* example:
* model.test("/absolute/url/to/test.txt", 1, 0.0).then((meter) => {
* console.log(meter.precision);
* console.log(meter.recall);
* console.log(meter.f1Score);
* console.log(meter.nexamples());
* });
*
*/
test(url, k, threshold) {
const fetchFunc = (thisModule && thisModule.fetch) || fetch;
const fastTextNative = this.f;
return new Promise(function(resolve, reject) {
fetchFunc(url).then(response => {
return response.arrayBuffer();
}).then(bytes => {
const byteArray = new Uint8Array(bytes);
const FS = fastTextModule.FS;
FS.writeFile(testFileInWasmFs, byteArray);
}).then(() => {
const meter = fastTextNative.test(testFileInWasmFs, k, threshold);
resolve(meter);
}).catch(error => {
reject(error);
});
});
}
}

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -1,44 +0,0 @@
# Version of this schema
schema: 1
bugzilla:
# Bugzilla product and component for this directory and subdirectories
product: Firefox
component: Translation
# Document the source of externally hosted code
origin:
# Short name of the package/library
name: fasttext
description: The JavaScript emscripten worker to run fastText
# Full URL for the package's homepage/etc
# Usually different from repository url
url: https://github.com/facebookresearch/fastText
# Human-readable identifier for this version/release
# Generally "version NNN", "tag SSS", "bookmark SSS"
release: v0.9.2
# Revision to pull in
# Must be a long or short commit SHA (long preferred)
revision: 3697152e0fd772d9185697fdbd4a1d340ca5571d
# The package's license, where possible using the mnemonic from
# https://spdx.org/licenses/
# Multiple licenses can be specified (as a YAML list)
# A "LICENSE" file must exist containing the full license text
license: MIT
notes: >
This code was generated from the fastText repository on the following revision:
3697152e0fd772d9185697fdbd4a1d340ca5571d
https://github.com/facebookresearch/fastText
There are detailed instructions in the Firefox Source Docs on how to build these
dependencies locally.
https://firefox-source-docs.mozilla.org/toolkit/components/translations/resources/02_contributing.html#building-fasttext

Просмотреть файл

@ -4,8 +4,6 @@
toolkit.jar:
content/global/translations/bergamot-translator.js (bergamot-translator/bergamot-translator.js)
content/global/translations/fasttext.js (fasttext/fasttext.js)
content/global/translations/fasttext_wasm.js (fasttext/fasttext_wasm.js)
content/global/translations/simd-detect-worker.js (content/simd-detect-worker.js)
content/global/translations/translations-document.sys.mjs (content/translations-document.sys.mjs)
content/global/translations/translations-engine.html (content/translations-engine.html)

Просмотреть файл

@ -3657,8 +3657,6 @@ SOFTWARE.
<li><code>third_party/js/cfworker/json-schema.js</code></li>
<li><code>security/nss/lib/freebl/ecl/ecp_secp384r1.c</code> and
<code>security/nss/lib/freebl/ecl/ecp_secp521r1.c</code></li>
<li><code>toolkit/components/translations/fasttext/fasttext.js</code> and
<code>toolkit/components/translations/fasttext/fasttext_wasm.js</code></li>
<li><code>security/nss/lib/freebl/ecl/curve25519_32.c</code>,
<code>security/nss/lib/freebl/ecl/ecp_secp384r1.c</code> and
<code>security/nss/lib/freebl/ecl/ecp_secp521r1.c</code></li>

Просмотреть файл

@ -180,8 +180,6 @@ toolkit/components/passwordmgr/PasswordRulesParser.sys.mjs
toolkit/components/protobuf/
toolkit/components/translation/cld2/
toolkit/components/translations/bergamot-translator
toolkit/components/translations/fasttext/fasttext.js
toolkit/components/translations/fasttext/fasttext_wasm.js
toolkit/components/url-classifier/chromium/
toolkit/components/utils/mozjexl.js
toolkit/components/viaduct/fetch_msg_types.pb.cc