зеркало из https://github.com/microsoft/archai.git
Current version seems to produce stable freeze trainng results. Undergoing more testing runs.
This commit is contained in:
Родитель
9a54ffb594
Коммит
0d8930aa70
|
@ -52,20 +52,20 @@ class FreezeTrainer(ArchTrainer, EnforceOverrides):
|
|||
# freeze everything other than the last layer
|
||||
self.freeze_but_last_layer()
|
||||
|
||||
# # reset optimizer
|
||||
# del self._multi_optim
|
||||
# reset optimizer
|
||||
del self._multi_optim
|
||||
|
||||
# self.conf_optim['lr'] = self.conf_train['proxynas']['freeze_lr']
|
||||
# self.conf_optim['decay'] = self.conf_train['proxynas']['freeze_decay']
|
||||
# self.conf_optim['momentum'] = self.conf_train['proxynas']['freeze_momentum']
|
||||
# self.conf_sched = Config()
|
||||
# self._aux_weight = self.conf_train['proxynas']['aux_weight']
|
||||
self.conf_optim['lr'] = self.conf_train['proxynas']['freeze_lr']
|
||||
self.conf_optim['decay'] = self.conf_train['proxynas']['freeze_decay']
|
||||
self.conf_optim['momentum'] = self.conf_train['proxynas']['freeze_momentum']
|
||||
self.conf_sched = Config()
|
||||
self._aux_weight = self.conf_train['proxynas']['aux_weight']
|
||||
|
||||
# self.model.zero_grad()
|
||||
# self._multi_optim = self.create_multi_optim(len(train_dl))
|
||||
# # before checkpoint restore, convert to amp
|
||||
# self.model = self._apex.to_amp(self.model, self._multi_optim,
|
||||
# batch_size=train_dl.batch_size)
|
||||
self.model.zero_grad()
|
||||
self._multi_optim = self.create_multi_optim(len(train_dl))
|
||||
# before checkpoint restore, convert to amp
|
||||
self.model = self._apex.to_amp(self.model, self._multi_optim,
|
||||
batch_size=train_dl.batch_size)
|
||||
|
||||
self._in_freeze_mode = True
|
||||
self._epoch_freeze_started = self._metrics.epochs()
|
||||
|
@ -85,9 +85,11 @@ class FreezeTrainer(ArchTrainer, EnforceOverrides):
|
|||
|
||||
for name, param in self.model.named_parameters():
|
||||
# TODO: Make the layer names to be updated a config value
|
||||
# 'logits_op._op'
|
||||
if 'fc' in name:
|
||||
param.requires_grad = True
|
||||
# 'fc' for resnet18
|
||||
# 'logits_op._op' for darts search space
|
||||
for identifier in self.conf_train['proxynas']['identifiers_to_unfreeze']:
|
||||
if identifier in name:
|
||||
param.requires_grad = True
|
||||
|
||||
for name, param in self.model.named_parameters():
|
||||
if param.requires_grad:
|
||||
|
|
|
@ -26,12 +26,13 @@ nas:
|
|||
type: 'cosine'
|
||||
min_lr: 0.001 # min learning rate to be set in eta_min param of scheduler
|
||||
proxynas:
|
||||
val_top1_acc_threshold: 0.05 # after some accuracy we will shift into training only the last layer
|
||||
val_top1_acc_threshold: 0.80 # after some accuracy we will shift into training only the last layer
|
||||
freeze_epochs: 200
|
||||
freeze_lr: 0.001
|
||||
freeze_decay: 0.0
|
||||
freeze_momentum: 0.0
|
||||
train_regular: False
|
||||
identifiers_to_unfreeze: ['fc', 'bn']
|
||||
aux_weight: 0.0 # disable auxiliary loss part during finetuning
|
||||
|
||||
|
||||
|
|
|
@ -12,6 +12,8 @@ nas:
|
|||
trainer:
|
||||
plotsdir: ''
|
||||
epochs: 600
|
||||
aux_weight: 0.0
|
||||
drop_path_prob: 0.0
|
||||
proxynas:
|
||||
val_top1_acc_threshold: 0.60 # after some accuracy we will shift into training only the last layer
|
||||
freeze_epochs: 200
|
||||
|
@ -19,4 +21,5 @@ nas:
|
|||
freeze_decay: 0.0
|
||||
freeze_momentum: 0.0
|
||||
train_regular: False
|
||||
identifiers_to_unfreeze: ['logits_op._op', 'bn']
|
||||
aux_weight: 0.0 # disable auxiliary loss part during finetuning
|
Загрузка…
Ссылка в новой задаче