From 1cf6843f5dba046a0ed4de5d056dcd7456ef78e2 Mon Sep 17 00:00:00 2001 From: Debadeepta Dey Date: Fri, 16 Jul 2021 10:46:13 -0700 Subject: [PATCH] Started testing random search on Darts search space. --- .vscode/launch.json | 8 + .../random_dartsspace_reg_exp_runner.py | 54 ++++++ .../random_dartsspace_reg_searcher.py | 4 +- .../random_natsbench_tss_reg_exp_runner.py | 1 - confs/algos/darts.yaml | 2 +- confs/algos/random_dartsspace_reg.yaml | 156 ++++++++++++++++++ scripts/main.py | 3 + 7 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 archai/algos/random_darts/random_dartsspace_reg_exp_runner.py create mode 100644 confs/algos/random_dartsspace_reg.yaml diff --git a/.vscode/launch.json b/.vscode/launch.json index 675ab11e..13dc235e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -507,6 +507,14 @@ "console": "integratedTerminal", "args": ["--full", "--algos", "random_natsbench_tss_reg", "--datasets", "cifar100", "--no-eval"] }, + { + "name": "Random Darts Space Reg Search", + "type": "python", + "request": "launch", + "program": "${cwd}/scripts/main.py", + "console": "integratedTerminal", + "args": ["--full", "--algos", "random_dartsspace_reg", "--no-eval"] + }, { "name": "Local Natsbench Tss Far Search", "type": "python", diff --git a/archai/algos/random_darts/random_dartsspace_reg_exp_runner.py b/archai/algos/random_darts/random_dartsspace_reg_exp_runner.py new file mode 100644 index 00000000..fe7bf9be --- /dev/null +++ b/archai/algos/random_darts/random_dartsspace_reg_exp_runner.py @@ -0,0 +1,54 @@ +from overrides import overrides +from typing import Optional, Type, Tuple + +from archai.nas.exp_runner import ExperimentRunner +from archai.nas.model_desc_builder import ModelDescBuilder +from archai.nas.arch_trainer import TArchTrainer +from archai.common import common +from archai.common import utils +from archai.common.config import Config +from archai.nas.evaluater import Evaluater, EvalResult +from archai.nas.searcher import Searcher, SearchResult +from archai.nas.finalizers import Finalizers +from archai.nas.random_finalizers import RandomFinalizers +from archai.nas.model_desc_builder import ModelDescBuilder +from archai.algos.random.random_model_desc_builder import RandomModelDescBuilder +from archai.algos.random_darts.random_dartsspace_reg_searcher import RandomDartsSpaceRegSearcher + +class RandomDartsSpaceRegExpRunner(ExperimentRunner): + ''' Runs random search using FastArchRank on DARTS search space ''' + + @overrides + def model_desc_builder(self)->Optional[ModelDescBuilder]: + return RandomModelDescBuilder() + + @overrides + def trainer_class(self)->TArchTrainer: + return None # no search trainer + + @overrides + def run_search(self, conf_search:Config)->SearchResult: + model_desc_builder = self.model_desc_builder() + trainer_class = self.trainer_class() + finalizers = self.finalizers() + search = self.searcher() + return search.search(conf_search, model_desc_builder, trainer_class, finalizers) + + @overrides + def run_eval(self, conf_eval:Config)->EvalResult: + evaler = self.evaluater() + return evaler.evaluate(conf_eval) + + @overrides + def searcher(self)->Searcher: + return RandomDartsSpaceRegSearcher() + + @overrides + def evaluater(self)->Evaluater: + return None + + @overrides + def copy_search_to_eval(self) -> None: + return None + + diff --git a/archai/algos/random_darts/random_dartsspace_reg_searcher.py b/archai/algos/random_darts/random_dartsspace_reg_searcher.py index bd412bae..35711264 100644 --- a/archai/algos/random_darts/random_dartsspace_reg_searcher.py +++ b/archai/algos/random_darts/random_dartsspace_reg_searcher.py @@ -15,6 +15,7 @@ from archai.nas.arch_trainer import TArchTrainer from archai.common.trainer import Trainer from archai.common import utils from archai.nas.finalizers import Finalizers +from archai.nas.model import Model from archai.algos.random.random_model_desc_builder import RandomModelDescBuilder @@ -48,8 +49,7 @@ class RandomDartsSpaceRegSearcher(Searcher): # as we are creating model based on seed model_desc = model_desc_builder.build(conf_model_desc, seed=seed_for_arch_generation) - model = self.model_from_desc(model_desc) - + model = Model(model_desc, droppath=True, affine=True) checkpoint = None diff --git a/archai/algos/random_natsbench/random_natsbench_tss_reg_exp_runner.py b/archai/algos/random_natsbench/random_natsbench_tss_reg_exp_runner.py index 5ac5650e..596b92ba 100644 --- a/archai/algos/random_natsbench/random_natsbench_tss_reg_exp_runner.py +++ b/archai/algos/random_natsbench/random_natsbench_tss_reg_exp_runner.py @@ -26,7 +26,6 @@ class RandomNatsbenchTssRegExpRunner(ExperimentRunner): def trainer_class(self)->TArchTrainer: return None # no search trainer - @overrides def run_search(self, conf_search:Config)->SearchResult: search = self.searcher() diff --git a/confs/algos/darts.yaml b/confs/algos/darts.yaml index 60c4349c..0108f90d 100644 --- a/confs/algos/darts.yaml +++ b/confs/algos/darts.yaml @@ -60,7 +60,7 @@ nas: resume: '_copy: /common/resume' model_desc: n_reductions: 2 # number of reductions to be applied - n_cells: 8 # number of cells + n_cells: 20 # number of cells dataset: _copy: '/dataset' max_final_edges: 2 # max edge that can be in final arch per node diff --git a/confs/algos/random_dartsspace_reg.yaml b/confs/algos/random_dartsspace_reg.yaml new file mode 100644 index 00000000..3a240e56 --- /dev/null +++ b/confs/algos/random_dartsspace_reg.yaml @@ -0,0 +1,156 @@ +__include__: "../datasets/cifar10.yaml" # default dataset settings are for cifar + +common: + experiment_name: 'throwaway' # you should supply from command line + experiment_desc: 'throwaway' + logdir: '~/logdir' + log_prefix: 'log' # prefix for log files that will becreated (log.log and log.yaml), no log files if '' + log_level: 20 # logging.INFO + backup_existing_log_file: False # should we overwrite existing log file without making a copy? + yaml_log: True # if True, structured logs as yaml are also generated + seed: 2.0 + tb_enable: False # if True then TensorBoard logging is enabled (may impact perf) + tb_dir: '$expdir/tb' # path where tensorboard logs would be stored + checkpoint: + filename: '$expdir/checkpoint.pth' + freq: 10 + + # reddis address of Ray cluster. Use None for single node run + # otherwise it should something like host:6379. Make sure to run on head node: + # "ray start --head --redis-port=6379" + redis: null + apex: # this is overriden in search and eval individually + enabled: False # global switch to disable everything apex + distributed_enabled: True # enable/disable distributed mode + mixed_prec_enabled: True # switch to disable amp mixed precision + gpus: '' # use GPU IDs specified here (comma separated), if '' then use all GPUs + opt_level: 'O2' # optimization level for mixed precision + bn_fp32: True # keep BN in fp32 + loss_scale: "dynamic" # loss scaling mode for mixed prec, must be string reprenting floar ot "dynamic" + sync_bn: False # should be replace BNs with sync BNs for distributed model + scale_lr: True # enable/disable distributed mode + min_world_size: 0 # allows to confirm we are indeed in distributed setting + detect_anomaly: False # if True, PyTorch code will run 6X slower + seed: '_copy: /common/seed' + ray: + enabled: False # initialize ray. Note: ray cannot be used if apex distributed is enabled + local_mode: False # if True then ray runs in serial mode + + smoke_test: False + only_eval: False + resume: True + +dataset: {} # default dataset settings comes from __include__ on the top + +nas: + search: + max_num_models: 5 + finalizer: 'default' # options are 'random' or 'default' + data_parallel: False + checkpoint: + _copy: '/common/checkpoint' + resume: '_copy: /common/resume' + search_iters: 1 + full_desc_filename: '$expdir/full_model_desc.yaml' # arch before it was finalized + final_desc_filename: '$expdir/final_model_desc.yaml' # final arch is saved in this file + metrics_dir: '$expdir/models/{reductions}/{cells}/{nodes}/{search_iter}' # where metrics and model stats would be saved from each pareto iteration + model_desc: + n_reductions: 2 # number of reductions to be applied + n_cells: 8 # number of cells + num_edges_to_sample: 2 # number of incoming edges per node to be randomly sampled + dataset: + _copy: '/dataset' + max_final_edges: 2 # max edge that can be in final arch per node + model_post_op: 'pool_adaptive_avg2d' + params: {} + aux_weight: 0.4 # weight for loss from auxiliary towers in test time arch + aux_tower_stride: 3 # stride that aux tower should use, 3 is good for 32x32 images, 2 for imagenet + model_stems: + ops: ['stem_conv3x3', 'stem_conv3x3'] + stem_multiplier: 3 # output channels multiplier for the stem + init_node_ch: 36 # num of input/output channels for nodes in 1st cell. NOTE: we match that in eval since this is discrete search. + cell: + n_nodes: 4 # number of nodes in a cell + cell_post_op: 'concate_channels' + loader: + apex: + _copy: '../../trainer/apex' + aug: '' # additional augmentations to use + cutout: 16 # cutout length, use cutout augmentation when > 0 + load_train: True # load train split of dataset + train_batch: 96 + train_workers: 4 # if null then gpu_count*4 + test_workers: '_copy: ../train_workers' # if null then 4 + load_test: True # load test split of dataset + test_batch: 1024 + val_ratio: 0.0 #split portion for test set, 0 to 1 + val_fold: 0 #Fold number to use (0 to 4) + cv_num: 5 # total number of folds available + dataset: + _copy: '/dataset' + trainer: + apex: + _copy: '/common/apex' + aux_weight: '_copy: /nas/search/model_desc/aux_weight' + drop_path_prob: 0.2 # probability that given edge will be dropped + grad_clip: 5.0 # grads above this value is clipped + logger_freq: 1000 # after every N updates dump loss and other metrics in logger + title: 'arch_train' + epochs: 1 + batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM + # additional vals for the derived class + plotsdir: '' #empty string means no plots, other wise plots are generated for each epoch in this dir + l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term + lossfn: + type: 'CrossEntropyLoss' + optimizer: + type: 'sgd' + lr: 0.025 # init learning rate + decay: 3.0e-4 + momentum: 0.9 # pytorch default is 0 + nesterov: False + decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers + lr_schedule: + type: 'cosine' + min_lr: 0.001 # min learning rate, this will be used in eta_min param of scheduler + warmup: null + validation: + title: 'search_val' + logger_freq: 0 + batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM + freq: 1 # perform validation only every N epochs + lossfn: + type: 'CrossEntropyLoss' + trainer_full: + apex: + _copy: '/common/apex' + aux_weight: '_copy: /nas/search/model_desc/aux_weight' + drop_path_prob: 0.2 # probability that given edge will be dropped + grad_clip: 5.0 # grads above this value is clipped + logger_freq: 1000 # after every N updates dump loss and other metrics in logger + title: 'arch_train' + epochs: 2 + batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM + # additional vals for the derived class + plotsdir: '' #empty string means no plots, other wise plots are generated for each epoch in this dir + l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term + lossfn: + type: 'CrossEntropyLoss' + optimizer: + type: 'sgd' + lr: 0.025 # init learning rate + decay: 3.0e-4 + momentum: 0.9 # pytorch default is 0 + nesterov: False + decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers + lr_schedule: + type: 'cosine' + min_lr: 0.001 # min learning rate, this will be used in eta_min param of scheduler + warmup: null + validation: + title: 'search_val' + logger_freq: 0 + batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM + freq: 1 # perform validation only every N epochs + lossfn: + type: 'CrossEntropyLoss' \ No newline at end of file diff --git a/scripts/main.py b/scripts/main.py index 0023b61b..34082e5c 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -29,6 +29,7 @@ from archai.algos.zero_cost_measures.zero_cost_natsbench_epochs_experiment_runne from archai.algos.random_natsbench.random_natsbench_tss_far_exp_runner import RandomNatsbenchTssFarExpRunner from archai.algos.random_natsbench.random_natsbench_tss_far_post_exp_runner import RandomNatsbenchTssFarPostExpRunner from archai.algos.random_natsbench.random_natsbench_tss_reg_exp_runner import RandomNatsbenchTssRegExpRunner +from archai.algos.random_darts.random_dartsspace_reg_exp_runner import RandomDartsSpaceRegExpRunner from archai.algos.local_search_natsbench.local_natsbench_tss_far_exp_runner import LocalNatsbenchTssFarExpRunner def main(): @@ -56,6 +57,7 @@ def main(): 'random_natsbench_tss_far': RandomNatsbenchTssFarExpRunner, 'random_natsbench_tss_far_post': RandomNatsbenchTssFarPostExpRunner, 'random_natsbench_tss_reg': RandomNatsbenchTssRegExpRunner, + 'random_dartsspace_reg': RandomDartsSpaceRegExpRunner, 'local_natsbench_tss_far': LocalNatsbenchTssFarExpRunner } @@ -83,6 +85,7 @@ def main(): random_natsbench_tss_far, random_natsbench_tss_far_post, random_natsbench_tss_reg, + random_dartsspace_reg, local_natsbench_tss_far''', help='NAS algos to run, separated by comma') parser.add_argument('--datasets', type=str, default='cifar10',