LID-tool/config.ini

39 строки
1.3 KiB
INI

[GENERAL]
# if verbose is 1 then display language probabilities for each word; by default it is on or set to 1; set to 0 to turn off.
verbose =
# default: HINDI
language_1 =
# default: ENGLISH
language_2 =
[DEFAULT PATHS]
# Path to the classifiers folder, default: os.path.join(os.getcwd(), 'classifiers', 'HiEn.classifier')
CLASSIFIER_PATH =
# Path to the temporary folder, os.path.join(os.getcwd(), 'tmp', '')
TMP_FILE_PATH =
# Path to the dictionary folder, default: os.path.join(os.getcwd(), 'dictionaries', '')
DICT_PATH =
# Path to the mallet binary folder, default: os.path.join(os.getcwd(), 'mallet-2.0.8', 'bin', 'mallet')
MALLET_PATH =
[DICTIONARY PROBABILITY VALUES]
# initialize probability values for the correct and incorrect language
# default: 0.999999999
dict_prob_yes =
# default: 1E-9
dict_prob_no =
[DICTIONARY NAMES]
# dictionary used to store already classified words between runs and in-memory
# default: memoize_dict.pkl
memoize_dict_file =
# name/number of dictionaries per language
language_1_dicts = hindict1
language_2_dicts = eng0dict1, eng1dict1
[DICTIONARY HIERARCHY]
# which files are combined to form which dictionary
eng0dict1 = dict1goog10k.txt, dict1coca.txt
eng1dict1 = dict1bigr.txt, dict1text.txt
hindict1 = dict1hinmov.txt, dict1hi.txt