зеркало из https://github.com/microsoft/LightGBM.git
118 строки
3.4 KiB
Plaintext
118 строки
3.4 KiB
Plaintext
# task type, support train and predict
|
|
task = train
|
|
|
|
# boosting type, support gbdt for now, alias: boosting, boost
|
|
boosting_type = gbdt
|
|
|
|
# application type, support following application
|
|
# regression , regression task
|
|
# binary , binary classification task
|
|
# lambdarank , LambdaRank task
|
|
# alias: application, app
|
|
objective = rank_xendcg
|
|
|
|
# eval metrics, support multi metric, delimite by ',' , support following metrics
|
|
# l1
|
|
# l2 , default metric for regression
|
|
# ndcg , default metric for lambdarank
|
|
# auc
|
|
# binary_logloss , default metric for binary
|
|
# binary_error
|
|
metric = ndcg
|
|
|
|
# evaluation position for ndcg metric, alias : ndcg_at
|
|
ndcg_eval_at = 1,3,5
|
|
|
|
# frequency for metric output
|
|
metric_freq = 1
|
|
|
|
# true if need output metric for training data, alias: tranining_metric, train_metric
|
|
is_training_metric = true
|
|
|
|
# column in data to use as label
|
|
label_column = 0
|
|
|
|
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
|
|
max_bin = 255
|
|
|
|
# training data
|
|
# if existing weight file, should name to "rank.train.weight"
|
|
# if existing query file, should name to "rank.train.query"
|
|
# alias: train_data, train
|
|
data = rank.train
|
|
|
|
# validation data, support multi validation data, separated by ','
|
|
# if existing weight file, should name to "rank.test.weight"
|
|
# if existing query file, should name to "rank.test.query"
|
|
# alias: valid, test, test_data,
|
|
valid_data = rank.test
|
|
|
|
# number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
|
|
num_trees = 100
|
|
|
|
# shrinkage rate , alias: shrinkage_rate
|
|
learning_rate = 0.1
|
|
|
|
# number of leaves for one tree, alias: num_leaf
|
|
num_leaves = 31
|
|
|
|
# type of tree learner, support following types:
|
|
# serial , single machine version
|
|
# feature , use feature parallel to train
|
|
# data , use data parallel to train
|
|
# voting , use voting based parallel to train
|
|
# alias: tree
|
|
tree_learner = serial
|
|
|
|
# Set num_threads and objective_seed for stable unit-tests. Comment out otherwise.
|
|
num_threads = 1
|
|
objective_seed = 1025
|
|
|
|
# feature sub-sample, will random select 80% feature to train on each iteration
|
|
# alias: sub_feature
|
|
feature_fraction = 1.0
|
|
|
|
# Support bagging (data sub-sample), will perform bagging every 5 iterations
|
|
bagging_freq = 1
|
|
|
|
# Bagging fraction, will random select 80% data on bagging
|
|
# alias: sub_row
|
|
bagging_fraction = 0.9
|
|
|
|
# minimal number data for one leaf, use this to deal with over-fit
|
|
# alias : min_data_per_leaf, min_data
|
|
min_data_in_leaf = 50
|
|
|
|
# minimal sum Hessians for one leaf, use this to deal with over-fit
|
|
min_sum_hessian_in_leaf = 5.0
|
|
|
|
# save memory and faster speed for sparse feature, alias: is_sparse
|
|
is_enable_sparse = true
|
|
|
|
# when data is bigger than memory size, set this to true. otherwise set false will have faster speed
|
|
# alias: two_round_loading, two_round
|
|
use_two_round_loading = false
|
|
|
|
# true if need to save data to binary file and application will auto load data from binary file next time
|
|
# alias: is_save_binary, save_binary
|
|
is_save_binary_file = false
|
|
|
|
# output model file
|
|
output_model = LightGBM_model.txt
|
|
|
|
# support continuous train from trained gbdt model
|
|
# input_model= trained_model.txt
|
|
|
|
# output prediction file for predict task
|
|
# output_result= prediction.txt
|
|
|
|
|
|
# number of machines in distributed training, alias: num_machine
|
|
num_machines = 1
|
|
|
|
# local listening port in distributed training, alias: local_port
|
|
local_listen_port = 12400
|
|
|
|
# machines list file for distributed training, alias: mlist
|
|
machine_list_file = mlist.txt
|