зеркало из https://github.com/microsoft/archai.git
Fix gradual wamup. enable dataset specific toy mode batch sizes, imagenet toy mode working, disable decay_bn for now
This commit is contained in:
Родитель
74e34ff8a9
Коммит
38f921cded
|
@ -62,7 +62,7 @@
|
|||
"args": ["--algos", "darts", "--datasets", "food101"]
|
||||
},
|
||||
{
|
||||
"name": "Darts-ImageNet-Toy",
|
||||
"name": "Darts-ImageNet-Eval-Toy",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${cwd}/scripts/main.py",
|
||||
|
|
|
@ -5,7 +5,6 @@ import math
|
|||
import torch
|
||||
from torch import nn
|
||||
from torch.optim import lr_scheduler, SGD, Adam
|
||||
from warmup_scheduler import GradualWarmupScheduler
|
||||
from torch.optim.lr_scheduler import _LRScheduler
|
||||
from torch.optim.optimizer import Optimizer
|
||||
from torch.nn.modules.loss import _WeightedLoss, _Loss
|
||||
|
@ -16,6 +15,7 @@ import statopt
|
|||
from .config import Config
|
||||
from .cocob import CocobBackprop
|
||||
from .ml_losses import SmoothCrossEntropyLoss
|
||||
from .warmup_scheduler import GradualWarmupScheduler
|
||||
|
||||
|
||||
def create_optimizer(conf_opt:Config, params)->Optimizer:
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
|
||||
# Credits: https://github.com/ildoonet/pytorch-gradual-warmup-lr
|
||||
|
||||
from torch.optim.lr_scheduler import _LRScheduler
|
||||
from torch.optim.lr_scheduler import ReduceLROnPlateau
|
||||
|
||||
|
||||
class GradualWarmupScheduler(_LRScheduler):
|
||||
""" Gradually warm-up(increasing) learning rate in optimizer.
|
||||
Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
|
||||
|
||||
Args:
|
||||
optimizer (Optimizer): Wrapped optimizer.
|
||||
multiplier: target learning rate = base lr * multiplier if multiplier > 1.0. if multiplier = 1.0, lr starts from 0 and ends up with the base_lr.
|
||||
total_epoch: target learning rate is reached at total_epoch, gradually
|
||||
after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau)
|
||||
"""
|
||||
|
||||
def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
|
||||
self.multiplier = multiplier
|
||||
if self.multiplier < 1.:
|
||||
raise ValueError(f'multiplier should be >= 1 but was {self.multiplier}')
|
||||
self.total_epoch = total_epoch
|
||||
self.after_scheduler = after_scheduler
|
||||
self.finished = False
|
||||
super(GradualWarmupScheduler, self).__init__(optimizer)
|
||||
|
||||
def get_lr(self):
|
||||
if self.last_epoch > self.total_epoch:
|
||||
if self.after_scheduler:
|
||||
if not self.finished:
|
||||
self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs]
|
||||
self.finished = True
|
||||
return self.after_scheduler.get_lr()
|
||||
return [base_lr * self.multiplier for base_lr in self.base_lrs]
|
||||
|
||||
if self.multiplier == 1.0:
|
||||
return [base_lr * (float(self.last_epoch) / self.total_epoch) for base_lr in self.base_lrs]
|
||||
else:
|
||||
return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]
|
||||
|
||||
def step_ReduceLROnPlateau(self, metrics, epoch=None):
|
||||
if epoch is None:
|
||||
epoch = self.last_epoch + 1
|
||||
self.last_epoch = epoch if epoch != 0 else 1 # ReduceLROnPlateau is called at the end of epoch, whereas others are called at beginning
|
||||
if self.last_epoch <= self.total_epoch:
|
||||
warmup_lr = [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]
|
||||
for param_group, lr in zip(self.optimizer.param_groups, warmup_lr):
|
||||
param_group['lr'] = lr
|
||||
else:
|
||||
if epoch is None:
|
||||
self.after_scheduler.step(metrics, None)
|
||||
else:
|
||||
self.after_scheduler.step(metrics, epoch - self.total_epoch)
|
||||
|
||||
def step(self, epoch=None, metrics=None):
|
||||
if type(self.after_scheduler) != ReduceLROnPlateau:
|
||||
if self.finished and self.after_scheduler:
|
||||
if epoch is None:
|
||||
self.after_scheduler.step(None)
|
||||
else:
|
||||
self.after_scheduler.step(epoch - self.total_epoch)
|
||||
self._last_lr = self.after_scheduler.get_last_lr()
|
||||
else:
|
||||
return super(GradualWarmupScheduler, self).step(epoch)
|
||||
else:
|
||||
self.step_ReduceLROnPlateau(metrics, epoch)
|
|
@ -10,7 +10,10 @@ common:
|
|||
checkpoint:
|
||||
filename: '$expdir/checkpoint.pth'
|
||||
freq: 10
|
||||
|
||||
toy_mode: # this section will be used by toy.yaml to setup the toy mode
|
||||
max_batches: 4
|
||||
train_batch: 32
|
||||
test_batch: 64
|
||||
# TODO: workers setting
|
||||
|
||||
# reddis address of Ray cluster. Use None for single node run
|
||||
|
|
|
@ -3,6 +3,10 @@ common:
|
|||
seed: 0.0
|
||||
apex:
|
||||
loss_scale: "128" # loss scaling mode for mixed prec, must be string reprenting floar ot "dynamic"
|
||||
toy_mode: # this section will be used by toy.yaml to setup the toy mode
|
||||
max_batches: 25
|
||||
train_batch: 64
|
||||
test_batch: 64
|
||||
|
||||
dataset_eval:
|
||||
name: 'imagenet'
|
||||
|
@ -63,11 +67,11 @@ nas:
|
|||
optimizer:
|
||||
lr: 2.048 # init learning rate
|
||||
decay: 3.05e-5
|
||||
decay_bn: 0.0 # if NaN then same as decay otherwise apply different decay to BN layers
|
||||
decay_bn: .NaN # if .NaN then same as decay otherwise apply different decay to BN layers
|
||||
momentum: 0.875 # pytorch default is 0.0
|
||||
lr_schedule:
|
||||
type: 'cosine'
|
||||
min_lr: 0.0 # min learning rate to se bet in eta_min param of scheduler
|
||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
||||
multiplier: 1
|
||||
multiplier: 1.0
|
||||
epochs: 8
|
|
@ -17,29 +17,32 @@ nas:
|
|||
trainer:
|
||||
epochs: 0 # number of epochs model will be trained before search
|
||||
loader:
|
||||
train_batch: 32
|
||||
test_batch: 64
|
||||
train_batch: '_copy: common/toy_mode/train_batch'
|
||||
test_batch: '_copy: common/toy_mode/test_batch'
|
||||
dataset:
|
||||
max_batches: 4
|
||||
max_batches: '_copy: common/toy_mode/max_batches'
|
||||
post_train:
|
||||
trainer:
|
||||
epochs: 1
|
||||
loader:
|
||||
train_batch: 32
|
||||
test_batch: 64
|
||||
train_batch: '_copy: common/toy_mode/train_batch'
|
||||
test_batch: '_copy: common/toy_mode/test_batch'
|
||||
dataset:
|
||||
max_batches: 4
|
||||
max_batches: '_copy: common/toy_mode/max_batches'
|
||||
model_desc:
|
||||
n_reductions: 1 # number of reductions to be applied
|
||||
n_cells: 3 # number of cells
|
||||
n_nodes: 2 # number of nodes in a cell
|
||||
loader:
|
||||
train_batch: 32
|
||||
test_batch: 64
|
||||
train_batch: '_copy: common/toy_mode/train_batch'
|
||||
test_batch: '_copy: common/toy_mode/test_batch'
|
||||
dataset:
|
||||
max_batches: 4
|
||||
max_batches: '_copy: common/toy_mode/max_batches'
|
||||
trainer:
|
||||
epochs: 1
|
||||
logger_freq: 1
|
||||
validation:
|
||||
logger_freq: 1
|
||||
eval:
|
||||
data_parallel: False
|
||||
checkpoint: null
|
||||
|
@ -48,9 +51,13 @@ nas:
|
|||
n_nodes: 4 # number of nodes in a cell
|
||||
n_reductions: 2 # number of reductions to be applied
|
||||
loader:
|
||||
train_batch: 32
|
||||
test_batch: 64
|
||||
train_batch: '_copy: common/toy_mode/train_batch'
|
||||
test_batch: '_copy: common/toy_mode/test_batch'
|
||||
dataset:
|
||||
max_batches: 2
|
||||
max_batches: '_copy: common/toy_mode/max_batches'
|
||||
trainer:
|
||||
epochs: 1
|
||||
logger_freq: 1
|
||||
validation:
|
||||
logger_freq: 1
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче