Nasbench101 is integrated but more testing needs to be done.

2021-01-17 21:28:16 -08:00 · 2021-01-17 21:28:16 -08:00 · 5ef9b5dfa6
--- a/archai/algos/proxynas/freeze_nasbench101_experiment_runner.py
+++ b/archai/algos/proxynas/freeze_nasbench101_experiment_runner.py
@ -45,44 +45,44 @@ class FreezeNasbench101ExperimentRunner(ExperimentRunner):
    def run_eval(self, conf_eval:Config)->EvalResult:
        # without training architecture evaluation score
        # ---------------------------------------
-        logger.pushd('naswotrain_evaluate')
-        naswotrain_evaler = NaswotrainNasbench101Evaluater()
-        conf_eval_naswotrain = deepcopy(conf_eval)
+        # logger.pushd('naswotrain_evaluate')
+        # naswotrain_evaler = NaswotrainNasbench101Evaluater()
+        # conf_eval_naswotrain = deepcopy(conf_eval)

-        if conf_eval_naswotrain['checkpoint'] is not None:
-            conf_eval_naswotrain['checkpoint']['filename'] = '$expdir/naswotrain_checkpoint.pth'
+        # if conf_eval_naswotrain['checkpoint'] is not None:
+        #     conf_eval_naswotrain['checkpoint']['filename'] = '$expdir/naswotrain_checkpoint.pth'

-        naswotrain_eval_result = naswotrain_evaler.evaluate(conf_eval_naswotrain, model_desc_builder=self.model_desc_builder())
-        logger.popd()
+        # naswotrain_eval_result = naswotrain_evaler.evaluate(conf_eval_naswotrain, model_desc_builder=self.model_desc_builder())
+        # logger.popd()

-        # regular evaluation of the architecture
-        # where we simply lookup the result
-        # --------------------------------------
-        logger.pushd('regular_evaluate')
-        arch_id = conf_eval['nasbench101']['arch_index']
-        dataroot = utils.full_path(conf_eval['loader']['dataset']['dataroot'])    
-        # assuming that nasbench101 has been 'installed' in the dataroot folder
-        nasbench101_location = os.path.join(dataroot, 'nasbench_ds', 'nasbench_only108.tfrecord.pkl')         
-        dataset_name = conf_eval['loader']['dataset']['name']
+        # # regular evaluation of the architecture
+        # # where we simply lookup the result
+        # # --------------------------------------
+        # logger.pushd('regular_evaluate')
+        # arch_id = conf_eval['nasbench101']['arch_index']
+        # dataroot = utils.full_path(conf_eval['loader']['dataset']['dataroot'])    
+        # # assuming that nasbench101 has been 'installed' in the dataroot folder
+        # nasbench101_location = os.path.join(dataroot, 'nasbench_ds', 'nasbench_only108.tfrecord.pkl')         
+        # dataset_name = conf_eval['loader']['dataset']['name']

-        # create the nasbench101 api
-        nsds = Nasbench101Dataset(nasbench101_location)
+        # # create the nasbench101 api
+        # nsds = Nasbench101Dataset(nasbench101_location)

-        # there are 423624 architectures total
-        if arch_id < 0 or arch_id > 423623:
-            logger.warn(f'architecture id {arch_id} is invalid ')
-            raise NotImplementedError()
+        # # there are 423624 architectures total
+        # if arch_id < 0 or arch_id > 423623:
+        #     logger.warn(f'architecture id {arch_id} is invalid ')
+        #     raise NotImplementedError()

-        if dataset_name != 'cifar10':
-            logger.warn(f'dataset {dataset_name} is not part of nasbench101')
-            raise NotImplementedError()
+        # if dataset_name != 'cifar10':
+        #     logger.warn(f'dataset {dataset_name} is not part of nasbench101')
+        #     raise NotImplementedError()

-        data = nsds[arch_id]
-        test_accuracy = data['avg_final_test_accuracy']
+        # data = nsds[arch_id]
+        # test_accuracy = data['avg_final_test_accuracy']

-        logger.info(f'Regular training top1 test accuracy is {test_accuracy}')
-        logger.info({'regtrainingtop1': float(test_accuracy)})
-        logger.popd()
+        # logger.info(f'Regular training top1 test accuracy is {test_accuracy}')
+        # logger.info({'regtrainingtop1': float(test_accuracy)})
+        # logger.popd()
            

        # freeze train evaluation of the architecture
--- a/confs/algos/proxynas_nasbench101_space.yaml
+++ b/confs/algos/proxynas_nasbench101_space.yaml
@ -12,33 +12,33 @@ nas:
    model_desc:
      num_edges_to_sample: 2
    loader:
-      train_batch: 2048 # 2048 for V100 works! 1024 for 2080Ti for testing
-      aug: 'fa_reduced_cifar10' # in natsbench paper they use random flip and crop, we are doing lot more here
+      train_batch: 256 # 512 for V100? 256 for 2080Ti for local development
+      aug: '' # in natsbench paper they use random flip and crop, we are doing lot more here
      naswotrain:
        train_batch: 256 # batch size for computing trainingless score
      freeze_loader:
-        train_batch: 3072 # batch size for freeze training
+        train_batch: 256 # batch size for freeze training. 256 works with 5gb usage on 2080Ti.
    trainer:
      plotsdir: ''
-      val_top1_acc_threshold: 0.60 # after some accuracy we will shift into training only the last 'n' layers
+      val_top1_acc_threshold: 0.10 # after some accuracy we will shift into training only the last 'n' layers
      apex:
        _copy: '/common/apex'
      aux_weight: '_copy: /nas/eval/model_desc/aux_weight'
-      drop_path_prob: 0.2 # probability that given edge will be dropped
+      drop_path_prob: 0.0 # probability that given edge will be dropped
      grad_clip: 5.0 # grads above this value is clipped
      l1_alphas: 0.0   # weight to be applied to sum(abs(alphas)) to loss term
      logger_freq: 1000 # after every N updates dump loss and other metrics in logger
      title: 'eval_train'
-      epochs: 200
+      epochs: 108
      batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
      lossfn:
        type: 'CrossEntropyLoss'
      optimizer:
        type: 'sgd'
        lr: 0.1 # init learning rate
-        decay: 5.0e-4 # pytorch default is 0.0
+        decay: 1.0e-4 # pytorch default is 0.0
        momentum: 0.9 # pytorch default is 0.0
-        nesterov: True # pytorch default is False
+        nesterov: False # pytorch default is False
        decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
      lr_schedule:
        type: 'cosine'