зеркало из https://github.com/microsoft/archai.git
Nasbench101 is integrated but more testing needs to be done.
This commit is contained in:
Родитель
dd7ed42c8a
Коммит
5ef9b5dfa6
|
@ -45,44 +45,44 @@ class FreezeNasbench101ExperimentRunner(ExperimentRunner):
|
|||
def run_eval(self, conf_eval:Config)->EvalResult:
|
||||
# without training architecture evaluation score
|
||||
# ---------------------------------------
|
||||
logger.pushd('naswotrain_evaluate')
|
||||
naswotrain_evaler = NaswotrainNasbench101Evaluater()
|
||||
conf_eval_naswotrain = deepcopy(conf_eval)
|
||||
# logger.pushd('naswotrain_evaluate')
|
||||
# naswotrain_evaler = NaswotrainNasbench101Evaluater()
|
||||
# conf_eval_naswotrain = deepcopy(conf_eval)
|
||||
|
||||
if conf_eval_naswotrain['checkpoint'] is not None:
|
||||
conf_eval_naswotrain['checkpoint']['filename'] = '$expdir/naswotrain_checkpoint.pth'
|
||||
# if conf_eval_naswotrain['checkpoint'] is not None:
|
||||
# conf_eval_naswotrain['checkpoint']['filename'] = '$expdir/naswotrain_checkpoint.pth'
|
||||
|
||||
naswotrain_eval_result = naswotrain_evaler.evaluate(conf_eval_naswotrain, model_desc_builder=self.model_desc_builder())
|
||||
logger.popd()
|
||||
# naswotrain_eval_result = naswotrain_evaler.evaluate(conf_eval_naswotrain, model_desc_builder=self.model_desc_builder())
|
||||
# logger.popd()
|
||||
|
||||
# regular evaluation of the architecture
|
||||
# where we simply lookup the result
|
||||
# --------------------------------------
|
||||
logger.pushd('regular_evaluate')
|
||||
arch_id = conf_eval['nasbench101']['arch_index']
|
||||
dataroot = utils.full_path(conf_eval['loader']['dataset']['dataroot'])
|
||||
# assuming that nasbench101 has been 'installed' in the dataroot folder
|
||||
nasbench101_location = os.path.join(dataroot, 'nasbench_ds', 'nasbench_only108.tfrecord.pkl')
|
||||
dataset_name = conf_eval['loader']['dataset']['name']
|
||||
# # regular evaluation of the architecture
|
||||
# # where we simply lookup the result
|
||||
# # --------------------------------------
|
||||
# logger.pushd('regular_evaluate')
|
||||
# arch_id = conf_eval['nasbench101']['arch_index']
|
||||
# dataroot = utils.full_path(conf_eval['loader']['dataset']['dataroot'])
|
||||
# # assuming that nasbench101 has been 'installed' in the dataroot folder
|
||||
# nasbench101_location = os.path.join(dataroot, 'nasbench_ds', 'nasbench_only108.tfrecord.pkl')
|
||||
# dataset_name = conf_eval['loader']['dataset']['name']
|
||||
|
||||
# create the nasbench101 api
|
||||
nsds = Nasbench101Dataset(nasbench101_location)
|
||||
# # create the nasbench101 api
|
||||
# nsds = Nasbench101Dataset(nasbench101_location)
|
||||
|
||||
# there are 423624 architectures total
|
||||
if arch_id < 0 or arch_id > 423623:
|
||||
logger.warn(f'architecture id {arch_id} is invalid ')
|
||||
raise NotImplementedError()
|
||||
# # there are 423624 architectures total
|
||||
# if arch_id < 0 or arch_id > 423623:
|
||||
# logger.warn(f'architecture id {arch_id} is invalid ')
|
||||
# raise NotImplementedError()
|
||||
|
||||
if dataset_name != 'cifar10':
|
||||
logger.warn(f'dataset {dataset_name} is not part of nasbench101')
|
||||
raise NotImplementedError()
|
||||
# if dataset_name != 'cifar10':
|
||||
# logger.warn(f'dataset {dataset_name} is not part of nasbench101')
|
||||
# raise NotImplementedError()
|
||||
|
||||
data = nsds[arch_id]
|
||||
test_accuracy = data['avg_final_test_accuracy']
|
||||
# data = nsds[arch_id]
|
||||
# test_accuracy = data['avg_final_test_accuracy']
|
||||
|
||||
logger.info(f'Regular training top1 test accuracy is {test_accuracy}')
|
||||
logger.info({'regtrainingtop1': float(test_accuracy)})
|
||||
logger.popd()
|
||||
# logger.info(f'Regular training top1 test accuracy is {test_accuracy}')
|
||||
# logger.info({'regtrainingtop1': float(test_accuracy)})
|
||||
# logger.popd()
|
||||
|
||||
|
||||
# freeze train evaluation of the architecture
|
||||
|
|
|
@ -12,33 +12,33 @@ nas:
|
|||
model_desc:
|
||||
num_edges_to_sample: 2
|
||||
loader:
|
||||
train_batch: 2048 # 2048 for V100 works! 1024 for 2080Ti for testing
|
||||
aug: 'fa_reduced_cifar10' # in natsbench paper they use random flip and crop, we are doing lot more here
|
||||
train_batch: 256 # 512 for V100? 256 for 2080Ti for local development
|
||||
aug: '' # in natsbench paper they use random flip and crop, we are doing lot more here
|
||||
naswotrain:
|
||||
train_batch: 256 # batch size for computing trainingless score
|
||||
freeze_loader:
|
||||
train_batch: 3072 # batch size for freeze training
|
||||
train_batch: 256 # batch size for freeze training. 256 works with 5gb usage on 2080Ti.
|
||||
trainer:
|
||||
plotsdir: ''
|
||||
val_top1_acc_threshold: 0.60 # after some accuracy we will shift into training only the last 'n' layers
|
||||
val_top1_acc_threshold: 0.10 # after some accuracy we will shift into training only the last 'n' layers
|
||||
apex:
|
||||
_copy: '/common/apex'
|
||||
aux_weight: '_copy: /nas/eval/model_desc/aux_weight'
|
||||
drop_path_prob: 0.2 # probability that given edge will be dropped
|
||||
drop_path_prob: 0.0 # probability that given edge will be dropped
|
||||
grad_clip: 5.0 # grads above this value is clipped
|
||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
||||
title: 'eval_train'
|
||||
epochs: 200
|
||||
epochs: 108
|
||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
||||
lossfn:
|
||||
type: 'CrossEntropyLoss'
|
||||
optimizer:
|
||||
type: 'sgd'
|
||||
lr: 0.1 # init learning rate
|
||||
decay: 5.0e-4 # pytorch default is 0.0
|
||||
decay: 1.0e-4 # pytorch default is 0.0
|
||||
momentum: 0.9 # pytorch default is 0.0
|
||||
nesterov: True # pytorch default is False
|
||||
nesterov: False # pytorch default is False
|
||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
||||
lr_schedule:
|
||||
type: 'cosine'
|
||||
|
|
Загрузка…
Ссылка в новой задаче