Nasbench101 is integrated but more testing needs to be done.

This commit is contained in:
Debadeepta Dey 2021-01-17 21:28:16 -08:00 коммит произвёл Gustavo Rosa
Родитель dd7ed42c8a
Коммит 5ef9b5dfa6
2 изменённых файлов: 38 добавлений и 38 удалений

Просмотреть файл

@ -45,44 +45,44 @@ class FreezeNasbench101ExperimentRunner(ExperimentRunner):
def run_eval(self, conf_eval:Config)->EvalResult:
# without training architecture evaluation score
# ---------------------------------------
logger.pushd('naswotrain_evaluate')
naswotrain_evaler = NaswotrainNasbench101Evaluater()
conf_eval_naswotrain = deepcopy(conf_eval)
# logger.pushd('naswotrain_evaluate')
# naswotrain_evaler = NaswotrainNasbench101Evaluater()
# conf_eval_naswotrain = deepcopy(conf_eval)
if conf_eval_naswotrain['checkpoint'] is not None:
conf_eval_naswotrain['checkpoint']['filename'] = '$expdir/naswotrain_checkpoint.pth'
# if conf_eval_naswotrain['checkpoint'] is not None:
# conf_eval_naswotrain['checkpoint']['filename'] = '$expdir/naswotrain_checkpoint.pth'
naswotrain_eval_result = naswotrain_evaler.evaluate(conf_eval_naswotrain, model_desc_builder=self.model_desc_builder())
logger.popd()
# naswotrain_eval_result = naswotrain_evaler.evaluate(conf_eval_naswotrain, model_desc_builder=self.model_desc_builder())
# logger.popd()
# regular evaluation of the architecture
# where we simply lookup the result
# --------------------------------------
logger.pushd('regular_evaluate')
arch_id = conf_eval['nasbench101']['arch_index']
dataroot = utils.full_path(conf_eval['loader']['dataset']['dataroot'])
# assuming that nasbench101 has been 'installed' in the dataroot folder
nasbench101_location = os.path.join(dataroot, 'nasbench_ds', 'nasbench_only108.tfrecord.pkl')
dataset_name = conf_eval['loader']['dataset']['name']
# # regular evaluation of the architecture
# # where we simply lookup the result
# # --------------------------------------
# logger.pushd('regular_evaluate')
# arch_id = conf_eval['nasbench101']['arch_index']
# dataroot = utils.full_path(conf_eval['loader']['dataset']['dataroot'])
# # assuming that nasbench101 has been 'installed' in the dataroot folder
# nasbench101_location = os.path.join(dataroot, 'nasbench_ds', 'nasbench_only108.tfrecord.pkl')
# dataset_name = conf_eval['loader']['dataset']['name']
# create the nasbench101 api
nsds = Nasbench101Dataset(nasbench101_location)
# # create the nasbench101 api
# nsds = Nasbench101Dataset(nasbench101_location)
# there are 423624 architectures total
if arch_id < 0 or arch_id > 423623:
logger.warn(f'architecture id {arch_id} is invalid ')
raise NotImplementedError()
# # there are 423624 architectures total
# if arch_id < 0 or arch_id > 423623:
# logger.warn(f'architecture id {arch_id} is invalid ')
# raise NotImplementedError()
if dataset_name != 'cifar10':
logger.warn(f'dataset {dataset_name} is not part of nasbench101')
raise NotImplementedError()
# if dataset_name != 'cifar10':
# logger.warn(f'dataset {dataset_name} is not part of nasbench101')
# raise NotImplementedError()
data = nsds[arch_id]
test_accuracy = data['avg_final_test_accuracy']
# data = nsds[arch_id]
# test_accuracy = data['avg_final_test_accuracy']
logger.info(f'Regular training top1 test accuracy is {test_accuracy}')
logger.info({'regtrainingtop1': float(test_accuracy)})
logger.popd()
# logger.info(f'Regular training top1 test accuracy is {test_accuracy}')
# logger.info({'regtrainingtop1': float(test_accuracy)})
# logger.popd()
# freeze train evaluation of the architecture

Просмотреть файл

@ -12,33 +12,33 @@ nas:
model_desc:
num_edges_to_sample: 2
loader:
train_batch: 2048 # 2048 for V100 works! 1024 for 2080Ti for testing
aug: 'fa_reduced_cifar10' # in natsbench paper they use random flip and crop, we are doing lot more here
train_batch: 256 # 512 for V100? 256 for 2080Ti for local development
aug: '' # in natsbench paper they use random flip and crop, we are doing lot more here
naswotrain:
train_batch: 256 # batch size for computing trainingless score
freeze_loader:
train_batch: 3072 # batch size for freeze training
train_batch: 256 # batch size for freeze training. 256 works with 5gb usage on 2080Ti.
trainer:
plotsdir: ''
val_top1_acc_threshold: 0.60 # after some accuracy we will shift into training only the last 'n' layers
val_top1_acc_threshold: 0.10 # after some accuracy we will shift into training only the last 'n' layers
apex:
_copy: '/common/apex'
aux_weight: '_copy: /nas/eval/model_desc/aux_weight'
drop_path_prob: 0.2 # probability that given edge will be dropped
drop_path_prob: 0.0 # probability that given edge will be dropped
grad_clip: 5.0 # grads above this value is clipped
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
title: 'eval_train'
epochs: 200
epochs: 108
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
lossfn:
type: 'CrossEntropyLoss'
optimizer:
type: 'sgd'
lr: 0.1 # init learning rate
decay: 5.0e-4 # pytorch default is 0.0
decay: 1.0e-4 # pytorch default is 0.0
momentum: 0.9 # pytorch default is 0.0
nesterov: True # pytorch default is False
nesterov: False # pytorch default is False
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
lr_schedule:
type: 'cosine'