Fix and improve language detection (#416)

* Fix and improve language detection
* Fixed linter
* Pass language defined in the html tag to bgscript
* Validate score
This commit is contained in:
Andre Natal 2022-06-28 01:58:58 -07:00 коммит произвёл GitHub
Родитель 5e188d5d9c
Коммит 4ff61b44e0
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 21 добавлений и 8 удалений

Просмотреть файл

@ -74,6 +74,7 @@ const translateAsBrowseMap = new Map();
let isMochitest = false;
const languageModelFileTypes = ["model", "lex", "vocab", "qualityModel", "srcvocab", "trgvocab"];
const CACHE_NAME = "fxtranslations";
const FT_SCORE_THRESHOLD = 0.75;
const init = () => {
Sentry.wrap(async () => {
@ -140,13 +141,25 @@ const messageListener = function(message, sender) {
}
/*
* call the cld experiment to detect the language of the snippet
* call fasttext to detect the language of the snippet
* extracted from the page
*/
let pageLanguage = modelFastText
.predict(message.languageDetection.wordsToDetect.trim().replace(/(\r\n|\n|\r)/gm, ""), 1, 0.0)
.get(0)[1]
.replace("__label__", "");
const cleanedWords = message.languageDetection.wordsToDetect
.toLowerCase()
.trim()
.replace(/(\r\n|\n|\r)/gm, " ");
const [score, ftLanguage] = modelFastText
.predict(cleanedWords, 1, 0.0)
.get(0);
let pageLanguage = "";
if (score > FT_SCORE_THRESHOLD) {
pageLanguage = ftLanguage.replace("__label__", "");
} else if (message.languageDetection.htmlElementLanguage.length > 0) {
pageLanguage = message.languageDetection.htmlElementLanguage.substring(0,2);
} else {
break;
}
/*
* language detector returns "no" for Norwegian Bokmål ("nb")

Просмотреть файл

@ -22,7 +22,8 @@ class LanguageDetection {
supported
? document.body.innerText.substring(0, 4096)
: "";
return { supported, wordsToDetect };
const htmlElementLanguage = document.documentElement?.lang;
return { supported, wordsToDetect, htmlElementLanguage };
}
/*
@ -33,7 +34,7 @@ class LanguageDetection {
}
/*
* return if the page mets the conditiions to display
* return if the page meets the conditiions to display
* or not the translation bar
*/
shouldDisplayTranslation() {
@ -44,7 +45,6 @@ class LanguageDetection {
return this.isLangMismatch() &&
this.languagePairsSupportedSet.has(from) &&
this.languagePairsSupportedSet.has(to);
}
/*