Allow optional warmup, switch to NVidia settings for imagenet

This commit is contained in:
Shital Shah 2020-04-23 22:57:45 -07:00
Родитель f8e7d141da
Коммит 446448c69c
3 изменённых файлов: 40 добавлений и 40 удалений

Просмотреть файл

@ -124,11 +124,11 @@ def create_lr_scheduler(conf_lrs:Config, epochs:int, optimizer:Optimizer,
raise ValueError('invalid lr_schduler=%s' % lr_scheduler_type)
# select warmup for LR schedule
if conf_lrs.get_val('warmup', None):
if warmup_epochs:
scheduler = GradualWarmupScheduler(
optimizer,
multiplier=conf_lrs['warmup'].get_val('multiplier', 1.0),
total_epoch=conf_lrs['warmup']['epochs'],
total_epoch=warmup_epochs,
after_scheduler=scheduler
)

Просмотреть файл

@ -108,9 +108,9 @@ nas:
lr_schedule:
type: 'cosine'
min_lr: 0.001 # min learning rate to se bet in eta_min param of scheduler
warmup: null # increases LR for 0 to current in specified epochs and then hands over to main scheduler
# multiplier: 1 # end warmup at this multiple of LR
# epochs: 1
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
multiplier: 1
epochs: 0 # 0 disables warmup
validation:
title: 'eval_test'
logger_freq: 0

Просмотреть файл

@ -26,48 +26,48 @@ nas:
_copy: '/dataset_eval'
# darts setup
loader:
batch: 128
dataset:
_copy: '/dataset_eval'
trainer:
aux_weight: 0.4 # weight for loss from auxiliary towers in test time arch
drop_path_prob: 0.0 # probability that given edge will be dropped
epochs: 250
lossfn: # TODO: this is perhaps reversed for test/train?
type: 'CrossEntropyLabelSmooth'
smoothing: 0.1 # label smoothing
optimizer:
lr: 0.1 # init learning rate
decay: 3.0e-5
lr_schedule:
type: 'step'
decay_period: 1 # epochs between two learning rate decays
gamma: 0.97 # learning rate decay
# NVidia benchmark setup DGX1_RN50_AMP_90E.sh
# Enable amp and distributed 8 GPUs in apex section
# loader:
# batch: 256
# train_workers: 5
# test_workers: 5
# batch: 128
# dataset:
# _copy: '/dataset_eval'
# trainer:
# aux_weight: 0.0 # weight for loss from auxiliary towers in test time arch
# aux_weight: 0.4 # weight for loss from auxiliary towers in test time arch
# drop_path_prob: 0.0 # probability that given edge will be dropped
# epochs: 90
# epochs: 250
# lossfn: # TODO: this is perhaps reversed for test/train?
# type: 'CrossEntropyLabelSmooth'
# smoothing: 0.1 # label smoothing
# optimizer:
# lr: 2.048 # init learning rate
# decay: 3.05e-5
# decay_bn: 0.0 # if NaN then same as decay otherwise apply different decay to BN layers
# momentum: 0.875 # pytorch default is 0.0
# lr: 0.1 # init learning rate
# decay: 3.0e-5
# lr_schedule:
# type: 'cosine'
# min_lr: 0.0 # min learning rate to se bet in eta_min param of scheduler
# warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
# multiplier: 1
# epochs: 8
# type: 'step'
# decay_period: 1 # epochs between two learning rate decays
# gamma: 0.97 # learning rate decay
# NVidia benchmark setup DGX1_RN50_AMP_90E.sh
# Enable amp and distributed 8 GPUs in apex section
loader:
batch: 256
train_workers: 5
test_workers: 5
dataset:
_copy: '/dataset_eval'
trainer:
aux_weight: 0.0 # weight for loss from auxiliary towers in test time arch
drop_path_prob: 0.0 # probability that given edge will be dropped
epochs: 90
lossfn: # TODO: this is perhaps reversed for test/train?
type: 'CrossEntropyLabelSmooth'
smoothing: 0.1 # label smoothing
optimizer:
lr: 2.048 # init learning rate
decay: 3.05e-5
decay_bn: 0.0 # if NaN then same as decay otherwise apply different decay to BN layers
momentum: 0.875 # pytorch default is 0.0
lr_schedule:
type: 'cosine'
min_lr: 0.0 # min learning rate to se bet in eta_min param of scheduler
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
multiplier: 1
epochs: 8