Current version seems to produce stable freeze trainng results. Undergoing more testing runs.

This commit is contained in:
Debadeepta Dey 2020-12-03 16:40:17 -08:00 коммит произвёл Gustavo Rosa
Родитель 9a54ffb594
Коммит 0d8930aa70
3 изменённых файлов: 22 добавлений и 16 удалений

Просмотреть файл

@ -52,20 +52,20 @@ class FreezeTrainer(ArchTrainer, EnforceOverrides):
# freeze everything other than the last layer
self.freeze_but_last_layer()
# # reset optimizer
# del self._multi_optim
# reset optimizer
del self._multi_optim
# self.conf_optim['lr'] = self.conf_train['proxynas']['freeze_lr']
# self.conf_optim['decay'] = self.conf_train['proxynas']['freeze_decay']
# self.conf_optim['momentum'] = self.conf_train['proxynas']['freeze_momentum']
# self.conf_sched = Config()
# self._aux_weight = self.conf_train['proxynas']['aux_weight']
self.conf_optim['lr'] = self.conf_train['proxynas']['freeze_lr']
self.conf_optim['decay'] = self.conf_train['proxynas']['freeze_decay']
self.conf_optim['momentum'] = self.conf_train['proxynas']['freeze_momentum']
self.conf_sched = Config()
self._aux_weight = self.conf_train['proxynas']['aux_weight']
# self.model.zero_grad()
# self._multi_optim = self.create_multi_optim(len(train_dl))
# # before checkpoint restore, convert to amp
# self.model = self._apex.to_amp(self.model, self._multi_optim,
# batch_size=train_dl.batch_size)
self.model.zero_grad()
self._multi_optim = self.create_multi_optim(len(train_dl))
# before checkpoint restore, convert to amp
self.model = self._apex.to_amp(self.model, self._multi_optim,
batch_size=train_dl.batch_size)
self._in_freeze_mode = True
self._epoch_freeze_started = self._metrics.epochs()
@ -85,9 +85,11 @@ class FreezeTrainer(ArchTrainer, EnforceOverrides):
for name, param in self.model.named_parameters():
# TODO: Make the layer names to be updated a config value
# 'logits_op._op'
if 'fc' in name:
param.requires_grad = True
# 'fc' for resnet18
# 'logits_op._op' for darts search space
for identifier in self.conf_train['proxynas']['identifiers_to_unfreeze']:
if identifier in name:
param.requires_grad = True
for name, param in self.model.named_parameters():
if param.requires_grad:

Просмотреть файл

@ -26,12 +26,13 @@ nas:
type: 'cosine'
min_lr: 0.001 # min learning rate to be set in eta_min param of scheduler
proxynas:
val_top1_acc_threshold: 0.05 # after some accuracy we will shift into training only the last layer
val_top1_acc_threshold: 0.80 # after some accuracy we will shift into training only the last layer
freeze_epochs: 200
freeze_lr: 0.001
freeze_decay: 0.0
freeze_momentum: 0.0
train_regular: False
identifiers_to_unfreeze: ['fc', 'bn']
aux_weight: 0.0 # disable auxiliary loss part during finetuning

Просмотреть файл

@ -12,6 +12,8 @@ nas:
trainer:
plotsdir: ''
epochs: 600
aux_weight: 0.0
drop_path_prob: 0.0
proxynas:
val_top1_acc_threshold: 0.60 # after some accuracy we will shift into training only the last layer
freeze_epochs: 200
@ -19,4 +21,5 @@ nas:
freeze_decay: 0.0
freeze_momentum: 0.0
train_regular: False
identifiers_to_unfreeze: ['logits_op._op', 'bn']
aux_weight: 0.0 # disable auxiliary loss part during finetuning