зеркало из https://github.com/microsoft/msrflute.git
Some documentation
This commit is contained in:
Родитель
ac2ac8967e
Коммит
59bd382291
|
@ -1,5 +1,7 @@
|
|||
# Basic configuration file for running locally nlg_gru example using json files.
|
||||
model_config:
|
||||
# Basic configuration file for running locally nlg_gru example using json in Azure ML.
|
||||
|
||||
# Parameters needed to initialize the model
|
||||
model_config:
|
||||
model_type: GRU
|
||||
model_folder: experiments/nlg_gru/model.py
|
||||
pretrained_model_path: <add path to pretrained weights here>
|
||||
|
@ -8,105 +10,128 @@ model_config:
|
|||
hidden_dim: 512
|
||||
OOV_correct: false
|
||||
|
||||
# Configuration for differential privacy
|
||||
dp_config:
|
||||
enable_local_dp: false
|
||||
enable_local_dp: false # If enabled, the rest of parameters is needed.
|
||||
# enable_local_dp: true # Local dp clips and adds noise on the client and centrally accumulates the privacy budget
|
||||
# eps: 100 # epsilon
|
||||
# max_grad: 0.008 # max gradient
|
||||
# weight_scaler: 0.0001
|
||||
# max_weight: 0.0001 # The max_weight and min_weight should be already scaled by weight_scaler
|
||||
# min_weight: 0.00009 # Because we scale down the weight using weight_scalar -> clip -> add noise -> scale back up.
|
||||
|
||||
# Additional privacy metrics
|
||||
privacy_metrics_config:
|
||||
apply_metrics: false
|
||||
apply_metrics: false # If enabled, the rest of parameters is needed.
|
||||
# apply_indices_extraction: true # If we extract word indices we want to consider the rank of the words extracted.
|
||||
# allowed_word_rank: 9000 # Any word that rank above this value is considered privacy risk
|
||||
# apply_leakage_metric: true
|
||||
# max_leakage: 30
|
||||
# max_allowed_leakage: 3
|
||||
# adaptive_leakage_threshold: 0.95 # Takes the 95th percentile of the leakage for the next round.
|
||||
# is_leakage_weighted: true
|
||||
# attacker_optimizer_config:
|
||||
# lr: 0.03
|
||||
# type: adamax
|
||||
# amsgrad: false
|
||||
|
||||
# Determines all the server-side settings for training and evaluation rounds
|
||||
server_config:
|
||||
wantRL: false
|
||||
resume_from_checkpoint: true
|
||||
do_profiling: false
|
||||
optimizer_config:
|
||||
wantRL: false # Enable/Disable Reinforcement learning
|
||||
resume_from_checkpoint: true # Resumes from latest checkpoint iteration if available
|
||||
do_profiling: false # Capture profiling information during server updates.
|
||||
optimizer_config: # Configuration for server-side optimizer
|
||||
type: lamb
|
||||
lr: 0.1
|
||||
weight_decay: 0.005
|
||||
annealing_config:
|
||||
annealing_config: # This section configures how the learning rate decays
|
||||
type: step_lr
|
||||
step_interval: epoch
|
||||
gamma: 1.0
|
||||
step_size: 100
|
||||
val_freq: 2
|
||||
rec_freq: 4
|
||||
initial_val : true
|
||||
initial_freq: false
|
||||
max_iteration: 11
|
||||
num_clients_per_iteration: 10
|
||||
data_config:
|
||||
val:
|
||||
val_freq: 2 # Frequency for validation rounds
|
||||
rec_freq: 4 # Frequency for testing rounds
|
||||
initial_val : true # Enable initial validation round at itr=0
|
||||
initial_freq: false # Enable initial testing round at itr=0
|
||||
max_iteration: 11 # Total number of rounds for FL
|
||||
num_clients_per_iteration: 10 # Number of clients sampled per round
|
||||
data_config: # Server-side data configuration
|
||||
val: # Validation data
|
||||
batch_size: 2048
|
||||
loader_type: text
|
||||
tokenizer_type: not_applicable
|
||||
prepend_datapath: false
|
||||
val_data: <add path to data here>
|
||||
vocab_dict: <add path to vocab here>
|
||||
val_data: <add path to data here> # Path for validation data
|
||||
vocab_dict: <add path to vocab here> # Path for vocabulary
|
||||
pin_memory: true
|
||||
num_workers: 0
|
||||
num_frames: 2400
|
||||
num_workers: 0 # Indicates how many workers are used for creating batches
|
||||
num_frames: 2400
|
||||
max_batch_size: 2048
|
||||
max_num_words: 25
|
||||
unsorted_batch: true
|
||||
train:
|
||||
batch_size: 128
|
||||
loader_type: text
|
||||
tokenizer_type: not_applicable
|
||||
prepend_datapath: false
|
||||
train_data: null
|
||||
train_data_server: null
|
||||
vocab_dict: <add path to vocab here>
|
||||
pin_memory: true
|
||||
num_workers: 0
|
||||
num_frames: 2400
|
||||
desired_max_samples: 500
|
||||
max_grad_norm: 10.0
|
||||
max_batch_size: 128
|
||||
max_num_words: 25
|
||||
unsorted_batch: true
|
||||
test:
|
||||
# Note this is NOT the main training data configuration, which is configured in the
|
||||
# client config. This section is ignored unless you are running replay data.
|
||||
# If you want to run replay data- set a path name for train_data_server.
|
||||
# train:
|
||||
# batch_size: 128
|
||||
# loader_type: text
|
||||
# tokenizer_type: not_applicable
|
||||
# prepend_datapath: false
|
||||
# train_data: null
|
||||
# train_data_server: null
|
||||
# vocab_dict: <add path to vocab here>
|
||||
# pin_memory: true
|
||||
# num_workers: 0
|
||||
# num_frames: 2400
|
||||
# desired_max_samples: 500
|
||||
# max_grad_norm: 10.0
|
||||
# max_batch_size: 128
|
||||
# max_num_words: 25
|
||||
# unsorted_batch: true
|
||||
test: # Test data configuration
|
||||
batch_size: 2048
|
||||
loader_type: text
|
||||
tokenizer_type: not_applicable
|
||||
prepend_datapath: false
|
||||
train_data: null
|
||||
train_data_server: null
|
||||
test_data: <add path to data here>
|
||||
vocab_dict: <add path to vocab here>
|
||||
test_data: <add path to data here> # Path for validation data
|
||||
vocab_dict: <add path to vocab here> # Path for vocabulary
|
||||
pin_memory: true
|
||||
num_workers: 0
|
||||
num_workers: 0 # Indicates how many workers are used for creating batches
|
||||
max_batch_size: 2048
|
||||
max_num_words: 25
|
||||
unsorted_batch: true
|
||||
type: model_optimization
|
||||
aggregate_median: softmax
|
||||
weight_train_loss: train_loss
|
||||
aggregate_median: softmax # FL aggregation method
|
||||
weight_train_loss: train_loss # Determines how each client's weight is computed (e.g. grad_mean_loss, train_loss)
|
||||
softmax_beta: 20.0
|
||||
initial_lr_client: 1.0
|
||||
lr_decay_factor: 1.0
|
||||
best_model_criterion: loss
|
||||
fall_back_to_best_model: false
|
||||
server_replay_config:
|
||||
best_model_criterion: loss # Determine the best model based on minimal loss, for checkpointing
|
||||
fall_back_to_best_model: false # If a model degrades, use the previous best model
|
||||
server_replay_config: # This is only applies if the server-side training data is fully configured and loaded
|
||||
server_iterations: 50
|
||||
optimizer_config:
|
||||
type: adam
|
||||
lr: 0.00002
|
||||
amsgrad: true
|
||||
|
||||
# Dictates the learning parameters for client-side model updates. Train data is defined inside this config.
|
||||
client_config:
|
||||
meta_learning: basic
|
||||
stats_on_smooth_grad: true
|
||||
ignore_subtask: false
|
||||
num_skips_threshold: 10
|
||||
copying_train_data: false
|
||||
do_profiling: false
|
||||
do_profiling: false # Enables client-side training profiling
|
||||
data_config:
|
||||
train:
|
||||
train: # This is the main training data configuration
|
||||
batch_size: 64
|
||||
loader_type: text
|
||||
tokenizer_type: not_applicable
|
||||
prepend_datapath: false
|
||||
list_of_train_data: <add path to data here>
|
||||
vocab_dict: <add path to vocab here>
|
||||
list_of_train_data: <add path to data here> # Path to training data
|
||||
vocab_dict: <add path to vocab here> # Path to vocabulary
|
||||
pin_memory: true
|
||||
num_workers: 0
|
||||
desired_max_samples: 50000
|
||||
|
|
Загрузка…
Ссылка в новой задаче