Fix and improve language detection (#416)
* Fix and improve language detection * Fixed linter * Pass language defined in the html tag to bgscript * Validate score
This commit is contained in:
Родитель
5e188d5d9c
Коммит
4ff61b44e0
|
@ -74,6 +74,7 @@ const translateAsBrowseMap = new Map();
|
|||
let isMochitest = false;
|
||||
const languageModelFileTypes = ["model", "lex", "vocab", "qualityModel", "srcvocab", "trgvocab"];
|
||||
const CACHE_NAME = "fxtranslations";
|
||||
const FT_SCORE_THRESHOLD = 0.75;
|
||||
|
||||
const init = () => {
|
||||
Sentry.wrap(async () => {
|
||||
|
@ -140,13 +141,25 @@ const messageListener = function(message, sender) {
|
|||
}
|
||||
|
||||
/*
|
||||
* call the cld experiment to detect the language of the snippet
|
||||
* call fasttext to detect the language of the snippet
|
||||
* extracted from the page
|
||||
*/
|
||||
let pageLanguage = modelFastText
|
||||
.predict(message.languageDetection.wordsToDetect.trim().replace(/(\r\n|\n|\r)/gm, ""), 1, 0.0)
|
||||
.get(0)[1]
|
||||
.replace("__label__", "");
|
||||
const cleanedWords = message.languageDetection.wordsToDetect
|
||||
.toLowerCase()
|
||||
.trim()
|
||||
.replace(/(\r\n|\n|\r)/gm, " ");
|
||||
const [score, ftLanguage] = modelFastText
|
||||
.predict(cleanedWords, 1, 0.0)
|
||||
.get(0);
|
||||
let pageLanguage = "";
|
||||
|
||||
if (score > FT_SCORE_THRESHOLD) {
|
||||
pageLanguage = ftLanguage.replace("__label__", "");
|
||||
} else if (message.languageDetection.htmlElementLanguage.length > 0) {
|
||||
pageLanguage = message.languageDetection.htmlElementLanguage.substring(0,2);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* language detector returns "no" for Norwegian Bokmål ("nb")
|
||||
|
|
|
@ -22,7 +22,8 @@ class LanguageDetection {
|
|||
supported
|
||||
? document.body.innerText.substring(0, 4096)
|
||||
: "";
|
||||
return { supported, wordsToDetect };
|
||||
const htmlElementLanguage = document.documentElement?.lang;
|
||||
return { supported, wordsToDetect, htmlElementLanguage };
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -33,7 +34,7 @@ class LanguageDetection {
|
|||
}
|
||||
|
||||
/*
|
||||
* return if the page mets the conditiions to display
|
||||
* return if the page meets the conditiions to display
|
||||
* or not the translation bar
|
||||
*/
|
||||
shouldDisplayTranslation() {
|
||||
|
@ -44,7 +45,6 @@ class LanguageDetection {
|
|||
return this.isLangMismatch() &&
|
||||
this.languagePairsSupportedSet.has(from) &&
|
||||
this.languagePairsSupportedSet.has(to);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Загрузка…
Ссылка в новой задаче