зеркало из https://github.com/microsoft/LID-tool.git
39 строки
1.3 KiB
INI
39 строки
1.3 KiB
INI
[GENERAL]
|
|
# if verbose is 1 then display language probabilities for each word; by default it is on or set to 1; set to 0 to turn off.
|
|
verbose =
|
|
# default: HINDI
|
|
language_1 =
|
|
# default: ENGLISH
|
|
language_2 =
|
|
|
|
[DEFAULT PATHS]
|
|
# Path to the classifiers folder, default: os.path.join(os.getcwd(), 'classifiers', 'HiEn.classifier')
|
|
CLASSIFIER_PATH =
|
|
# Path to the temporary folder, os.path.join(os.getcwd(), 'tmp', '')
|
|
TMP_FILE_PATH =
|
|
# Path to the dictionary folder, default: os.path.join(os.getcwd(), 'dictionaries', '')
|
|
DICT_PATH =
|
|
# Path to the mallet binary folder, default: os.path.join(os.getcwd(), 'mallet-2.0.8', 'bin', 'mallet')
|
|
MALLET_PATH =
|
|
|
|
[DICTIONARY PROBABILITY VALUES]
|
|
# initialize probability values for the correct and incorrect language
|
|
# default: 0.999999999
|
|
dict_prob_yes =
|
|
# default: 1E-9
|
|
dict_prob_no =
|
|
|
|
[DICTIONARY NAMES]
|
|
# dictionary used to store already classified words between runs and in-memory
|
|
# default: memoize_dict.pkl
|
|
memoize_dict_file =
|
|
|
|
# name/number of dictionaries per language
|
|
language_1_dicts = hindict1
|
|
language_2_dicts = eng0dict1, eng1dict1
|
|
|
|
[DICTIONARY HIERARCHY]
|
|
# which files are combined to form which dictionary
|
|
eng0dict1 = dict1goog10k.txt, dict1coca.txt
|
|
eng1dict1 = dict1bigr.txt, dict1text.txt
|
|
hindict1 = dict1hinmov.txt, dict1hi.txt |