зеркало из https://github.com/microsoft/archai.git
Allow optional warmup, switch to NVidia settings for imagenet
This commit is contained in:
Родитель
f8e7d141da
Коммит
446448c69c
|
@ -124,11 +124,11 @@ def create_lr_scheduler(conf_lrs:Config, epochs:int, optimizer:Optimizer,
|
|||
raise ValueError('invalid lr_schduler=%s' % lr_scheduler_type)
|
||||
|
||||
# select warmup for LR schedule
|
||||
if conf_lrs.get_val('warmup', None):
|
||||
if warmup_epochs:
|
||||
scheduler = GradualWarmupScheduler(
|
||||
optimizer,
|
||||
multiplier=conf_lrs['warmup'].get_val('multiplier', 1.0),
|
||||
total_epoch=conf_lrs['warmup']['epochs'],
|
||||
total_epoch=warmup_epochs,
|
||||
after_scheduler=scheduler
|
||||
)
|
||||
|
||||
|
|
|
@ -108,9 +108,9 @@ nas:
|
|||
lr_schedule:
|
||||
type: 'cosine'
|
||||
min_lr: 0.001 # min learning rate to se bet in eta_min param of scheduler
|
||||
warmup: null # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
||||
# multiplier: 1 # end warmup at this multiple of LR
|
||||
# epochs: 1
|
||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
||||
multiplier: 1
|
||||
epochs: 0 # 0 disables warmup
|
||||
validation:
|
||||
title: 'eval_test'
|
||||
logger_freq: 0
|
||||
|
|
|
@ -26,48 +26,48 @@ nas:
|
|||
_copy: '/dataset_eval'
|
||||
|
||||
# darts setup
|
||||
loader:
|
||||
batch: 128
|
||||
dataset:
|
||||
_copy: '/dataset_eval'
|
||||
trainer:
|
||||
aux_weight: 0.4 # weight for loss from auxiliary towers in test time arch
|
||||
drop_path_prob: 0.0 # probability that given edge will be dropped
|
||||
epochs: 250
|
||||
lossfn: # TODO: this is perhaps reversed for test/train?
|
||||
type: 'CrossEntropyLabelSmooth'
|
||||
smoothing: 0.1 # label smoothing
|
||||
optimizer:
|
||||
lr: 0.1 # init learning rate
|
||||
decay: 3.0e-5
|
||||
lr_schedule:
|
||||
type: 'step'
|
||||
decay_period: 1 # epochs between two learning rate decays
|
||||
gamma: 0.97 # learning rate decay
|
||||
|
||||
# NVidia benchmark setup DGX1_RN50_AMP_90E.sh
|
||||
# Enable amp and distributed 8 GPUs in apex section
|
||||
# loader:
|
||||
# batch: 256
|
||||
# train_workers: 5
|
||||
# test_workers: 5
|
||||
# batch: 128
|
||||
# dataset:
|
||||
# _copy: '/dataset_eval'
|
||||
# trainer:
|
||||
# aux_weight: 0.0 # weight for loss from auxiliary towers in test time arch
|
||||
# aux_weight: 0.4 # weight for loss from auxiliary towers in test time arch
|
||||
# drop_path_prob: 0.0 # probability that given edge will be dropped
|
||||
# epochs: 90
|
||||
# epochs: 250
|
||||
# lossfn: # TODO: this is perhaps reversed for test/train?
|
||||
# type: 'CrossEntropyLabelSmooth'
|
||||
# smoothing: 0.1 # label smoothing
|
||||
# optimizer:
|
||||
# lr: 2.048 # init learning rate
|
||||
# decay: 3.05e-5
|
||||
# decay_bn: 0.0 # if NaN then same as decay otherwise apply different decay to BN layers
|
||||
# momentum: 0.875 # pytorch default is 0.0
|
||||
# lr: 0.1 # init learning rate
|
||||
# decay: 3.0e-5
|
||||
# lr_schedule:
|
||||
# type: 'cosine'
|
||||
# min_lr: 0.0 # min learning rate to se bet in eta_min param of scheduler
|
||||
# warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
||||
# multiplier: 1
|
||||
# epochs: 8
|
||||
# type: 'step'
|
||||
# decay_period: 1 # epochs between two learning rate decays
|
||||
# gamma: 0.97 # learning rate decay
|
||||
|
||||
# NVidia benchmark setup DGX1_RN50_AMP_90E.sh
|
||||
# Enable amp and distributed 8 GPUs in apex section
|
||||
loader:
|
||||
batch: 256
|
||||
train_workers: 5
|
||||
test_workers: 5
|
||||
dataset:
|
||||
_copy: '/dataset_eval'
|
||||
trainer:
|
||||
aux_weight: 0.0 # weight for loss from auxiliary towers in test time arch
|
||||
drop_path_prob: 0.0 # probability that given edge will be dropped
|
||||
epochs: 90
|
||||
lossfn: # TODO: this is perhaps reversed for test/train?
|
||||
type: 'CrossEntropyLabelSmooth'
|
||||
smoothing: 0.1 # label smoothing
|
||||
optimizer:
|
||||
lr: 2.048 # init learning rate
|
||||
decay: 3.05e-5
|
||||
decay_bn: 0.0 # if NaN then same as decay otherwise apply different decay to BN layers
|
||||
momentum: 0.875 # pytorch default is 0.0
|
||||
lr_schedule:
|
||||
type: 'cosine'
|
||||
min_lr: 0.0 # min learning rate to se bet in eta_min param of scheduler
|
||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
||||
multiplier: 1
|
||||
epochs: 8
|
Загрузка…
Ссылка в новой задаче