зеркало из https://github.com/microsoft/archai.git
Nominal implementation of darcyflow with currently missing target decoder in the training loop.
This commit is contained in:
Родитель
e5d2c8afc6
Коммит
0f6d842165
|
@ -222,6 +222,14 @@
|
|||
"console": "integratedTerminal",
|
||||
"args": ["--full", "--algos", "darts_space_constant_random_archs", "--datasets", "cifar100"]
|
||||
},
|
||||
{
|
||||
"name": "Darts Space Constant Random Archs Darcyflow",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${cwd}/scripts/main.py",
|
||||
"console": "integratedTerminal",
|
||||
"args": ["--full", "--algos", "darts_space_constant_random_archs_darcyflow", "--datasets", "darcyflow"]
|
||||
},
|
||||
{
|
||||
"name": "Proxynas-Darts-Space-Full",
|
||||
"type": "python",
|
||||
|
|
|
@ -25,7 +25,7 @@ from archai.common.checkpoint import CheckPoint
|
|||
from archai.common.ml_utils import set_optim_lr
|
||||
from archai.datasets import data
|
||||
|
||||
TFreezeTrainer = Optional[Type['ConditionalTrainer']]
|
||||
TConditionalTrainer = Optional[Type['ConditionalTrainer']]
|
||||
|
||||
|
||||
class ConditionalTrainer(ArchTrainer, EnforceOverrides):
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from archai.nas.evaluater import EvalResult
|
||||
from typing import Type
|
||||
from copy import deepcopy
|
||||
|
||||
from overrides import overrides
|
||||
|
||||
from archai.common.config import Config
|
||||
from archai.nas import nas_utils
|
||||
from archai.nas.exp_runner import ExperimentRunner
|
||||
from archai.nas.arch_trainer import ArchTrainer, TArchTrainer
|
||||
from archai.nas.evaluater import Evaluater, EvalResult
|
||||
|
||||
from archai.common.common import get_expdir, logger
|
||||
|
||||
from archai.algos.random_sample_darts_space.random_model_desc_builder import RandomModelDescBuilder
|
||||
from archai.algos.proxynas.freeze_manual_searcher import ManualFreezeSearcher
|
||||
from archai.algos.random_sample_darts_space.darts_space_darcyflow_evaluater import DartsSpaceDarcyflowEvaluater
|
||||
|
||||
class DartsSpaceConstantRandomArchsDarcyflowExpRunner(ExperimentRunner):
|
||||
''' Samples a reproducible random architecture from
|
||||
DARTS search space and trains it but is specific to DarcyFlow dataset
|
||||
since it is a 2D input, dense 2D output task '''
|
||||
|
||||
@overrides
|
||||
def model_desc_builder(self)->RandomModelDescBuilder:
|
||||
return RandomModelDescBuilder()
|
||||
|
||||
@overrides
|
||||
def trainer_class(self)->TArchTrainer:
|
||||
return None
|
||||
|
||||
@overrides
|
||||
def searcher(self)->ManualFreezeSearcher:
|
||||
return ManualFreezeSearcher() # no searcher basically
|
||||
|
||||
@overrides
|
||||
def copy_search_to_eval(self)->None:
|
||||
pass
|
||||
|
||||
@overrides
|
||||
def run_eval(self, conf_eval:Config)->EvalResult:
|
||||
# regular evaluation of the architecture
|
||||
# this is expensive
|
||||
# --------------------------------------
|
||||
logger.pushd('regular_evaluate')
|
||||
evaler = DartsSpaceDarcyflowEvaluater()
|
||||
conf_eval_reg = deepcopy(conf_eval)
|
||||
reg_eval_result = evaler.evaluate(conf_eval_reg, model_desc_builder=self.model_desc_builder())
|
||||
logger.popd()
|
||||
|
||||
return reg_eval_result
|
|
@ -0,0 +1,57 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import Optional
|
||||
import importlib
|
||||
import sys
|
||||
import string
|
||||
import os
|
||||
|
||||
from overrides import overrides
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from overrides import overrides, EnforceOverrides
|
||||
|
||||
from archai.common.trainer import Trainer
|
||||
from archai.common.config import Config
|
||||
from archai.common.common import logger
|
||||
from archai.datasets import data
|
||||
from archai.nas.model_desc import ModelDesc
|
||||
from archai.nas.model_desc_builder import ModelDescBuilder
|
||||
from archai.nas.nas_utils import create_nb301_genotype_from_desc
|
||||
from archai.nas import nas_utils
|
||||
from archai.common import ml_utils, utils
|
||||
from archai.common.metrics import EpochMetrics, Metrics
|
||||
from archai.nas.model import Model
|
||||
from archai.common.checkpoint import CheckPoint
|
||||
from archai.nas.evaluater import Evaluater
|
||||
from archai.algos.proxynas.freeze_trainer import FreezeTrainer
|
||||
from archai.algos.proxynas.conditional_trainer import ConditionalTrainer
|
||||
from archai.algos.random_sample_darts_space.constant_darts_space_sampler import ConstantDartsSpaceSampler
|
||||
from archai.algos.random_sample_darts_space.random_model_desc_builder import RandomModelDescBuilder
|
||||
from archai.algos.random_sample_darts_space.darts_space_evaluater import DartsSpaceEvaluater
|
||||
from archai.nas.darcyflow_trainer import DarcyflowTrainer
|
||||
|
||||
class DartsSpaceDarcyflowEvaluater(DartsSpaceEvaluater):
|
||||
|
||||
@overrides
|
||||
def train_model(self, conf_train:Config, model:nn.Module,
|
||||
checkpoint:Optional[CheckPoint])->Metrics:
|
||||
conf_loader = conf_train['loader']
|
||||
conf_train = conf_train['trainer']
|
||||
|
||||
# only darcyflow works with this evaluater
|
||||
if conf_loader['dataset']['name'] != 'darcyflow':
|
||||
raise TypeError
|
||||
|
||||
# get data
|
||||
data_loaders = self.get_data(conf_loader)
|
||||
|
||||
# the trainer class is the only difference
|
||||
trainer = DarcyflowTrainer(conf_train, model, checkpoint)
|
||||
train_metrics = trainer.fit(data_loaders)
|
||||
return train_metrics
|
||||
|
|
@ -102,3 +102,52 @@ class LabelSmoothing(nn.Module):
|
|||
smooth_loss = -logprobs.mean(dim=-1)
|
||||
loss = self.confidence * nll_loss + self.smoothing * smooth_loss
|
||||
return loss.mean()
|
||||
|
||||
|
||||
|
||||
# Credits: https://github.com/rtu715/NAS-Bench-360/blob/d075006848c664371855c34082b0a00cda62be67/darts/gaea-dense/utils.py#L126
|
||||
class LpLoss(object):
|
||||
''' loss function with rel/abs Lp los '''
|
||||
def __init__(self, d=2, p=2, size_average=True, reduction=True):
|
||||
super(LpLoss, self).__init__()
|
||||
|
||||
#Dimension and Lp-norm type are postive
|
||||
assert d > 0 and p > 0
|
||||
|
||||
self.d = d
|
||||
self.p = p
|
||||
self.reduction = reduction
|
||||
self.size_average = size_average
|
||||
|
||||
def abs(self, x, y):
|
||||
num_examples = x.size()[0]
|
||||
|
||||
#Assume uniform mesh
|
||||
h = 1.0 / (x.size()[1] - 1.0)
|
||||
|
||||
all_norms = (h**(self.d/self.p))*torch.norm(x.view(num_examples,-1) - y.view(num_examples,-1), self.p, 1)
|
||||
|
||||
if self.reduction:
|
||||
if self.size_average:
|
||||
return torch.mean(all_norms)
|
||||
else:
|
||||
return torch.sum(all_norms)
|
||||
|
||||
return all_norms
|
||||
|
||||
def rel(self, x, y):
|
||||
num_examples = x.size()[0]
|
||||
|
||||
diff_norms = torch.norm(x.reshape(num_examples,-1) - y.reshape(num_examples,-1), self.p, 1)
|
||||
y_norms = torch.norm(y.reshape(num_examples,-1), self.p, 1)
|
||||
|
||||
if self.reduction:
|
||||
if self.size_average:
|
||||
return torch.mean(diff_norms/y_norms)
|
||||
else:
|
||||
return torch.sum(diff_norms/y_norms)
|
||||
|
||||
return diff_norms/y_norms
|
||||
|
||||
def __call__(self, x, y):
|
||||
return self.rel(x, y)
|
|
@ -167,6 +167,7 @@ class DarcyflowProvider(DatasetProvider):
|
|||
def __init__(self, conf_dataset:Config):
|
||||
super().__init__(conf_dataset)
|
||||
self._dataroot = utils.full_path(conf_dataset['dataroot'])
|
||||
self._sub = conf_dataset['sub']
|
||||
|
||||
@overrides
|
||||
def get_datasets(self, load_train:bool, load_test:bool,
|
||||
|
@ -176,7 +177,7 @@ class DarcyflowProvider(DatasetProvider):
|
|||
path_to_data = os.path.join(self._dataroot, 'darcyflow')
|
||||
|
||||
# load the dataset but without any validation split
|
||||
trainset, testset = load_darcyflow(path_to_data)
|
||||
trainset, testset = load_darcyflow(path_to_data, self._sub)
|
||||
|
||||
return trainset, testset
|
||||
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from typing import Callable, Tuple, Optional, Type
|
||||
|
||||
import torch
|
||||
from torch import nn, Tensor
|
||||
from torch.optim.optimizer import Optimizer
|
||||
from torch.optim.lr_scheduler import _LRScheduler
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from overrides import EnforceOverrides
|
||||
|
||||
from archai.common.metrics import Metrics
|
||||
from archai.common.tester import Tester
|
||||
from archai.common.config import Config
|
||||
from archai.common import utils, ml_utils
|
||||
from archai.common.common import logger
|
||||
from archai.datasets import data
|
||||
from archai.common.checkpoint import CheckPoint
|
||||
from archai.common.apex_utils import ApexUtils
|
||||
from archai.common.multi_optim import MultiOptim, OptimSched
|
||||
from archai.nas.nas_utils import get_model_stats
|
||||
from archai.nas.arch_trainer import ArchTrainer
|
||||
from archai.datasets.providers.darcyflow_provider import UnitGaussianNormalizer
|
||||
|
||||
TDarcyflowTrainer = Optional[Type['DarcyflowTrainer']]
|
||||
|
||||
|
||||
class DarcyflowTrainer(ArchTrainer, EnforceOverrides):
|
||||
def __init__(self, conf_train: Config, model: nn.Module,
|
||||
checkpoint:Optional[CheckPoint]) -> None:
|
||||
super().__init__(conf_train, model, checkpoint)
|
||||
|
||||
# region config vars specific to Darcyflow trainer
|
||||
|
||||
# endregion
|
||||
|
||||
def _train_epoch(self, train_dl: DataLoader)->None:
|
||||
steps = len(train_dl)
|
||||
self.model.train()
|
||||
|
||||
logger.pushd('steps')
|
||||
for step, (x, y) in enumerate(train_dl):
|
||||
logger.pushd(step)
|
||||
assert self.model.training # derived class might alter the mode
|
||||
|
||||
# TODO: please check that no algorithm is invalidated by swapping prestep with zero grad
|
||||
self._multi_optim.zero_grad()
|
||||
|
||||
self.pre_step(x, y)
|
||||
|
||||
# divide batch in to chunks if needed so it fits in GPU RAM
|
||||
if self.batch_chunks > 1:
|
||||
x_chunks, y_chunks = torch.chunk(x, self.batch_chunks), torch.chunk(y, self.batch_chunks)
|
||||
else:
|
||||
x_chunks, y_chunks = (x,), (y,)
|
||||
|
||||
logits_chunks = []
|
||||
loss_sum, loss_count = 0.0, 0
|
||||
for xc, yc in zip(x_chunks, y_chunks):
|
||||
xc, yc = xc.to(self.get_device(), non_blocking=True), yc.to(self.get_device(), non_blocking=True)
|
||||
|
||||
logits_c, aux_logits = self.model(xc), None
|
||||
tupled_out = isinstance(logits_c, Tuple) and len(logits_c) >=2
|
||||
if tupled_out: # then we are using model created by desc
|
||||
logits_c, aux_logits = logits_c[0], logits_c[1]
|
||||
|
||||
# darcyflow specific line
|
||||
logits = logits.squeeze()
|
||||
# WARNING, DEBUG: Making code run through for now
|
||||
# this is missing all the y's decoding
|
||||
|
||||
loss_c = self.compute_loss(self._lossfn, yc, logits_c,
|
||||
self._aux_weight, aux_logits)
|
||||
|
||||
self._apex.backward(loss_c, self._multi_optim)
|
||||
|
||||
loss_sum += loss_c.item() * len(logits_c)
|
||||
loss_count += len(logits_c)
|
||||
logits_chunks.append(logits_c.detach().cpu())
|
||||
|
||||
# TODO: original darts clips alphas as well but pt.darts doesn't
|
||||
self._apex.clip_grad(self._grad_clip, self.model, self._multi_optim)
|
||||
|
||||
self._multi_optim.step()
|
||||
|
||||
# TODO: we possibly need to sync so all replicas are upto date
|
||||
self._apex.sync_devices()
|
||||
|
||||
self.post_step(x, y,
|
||||
ml_utils.join_chunks(logits_chunks),
|
||||
torch.tensor(loss_sum/loss_count),
|
||||
steps)
|
||||
logger.popd()
|
||||
|
||||
# end of step
|
||||
|
||||
self._multi_optim.epoch()
|
||||
logger.popd()
|
||||
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
__include__: 'darts.yaml' # just use darts defaults
|
||||
|
||||
nas:
|
||||
search:
|
||||
model_desc:
|
||||
num_edges_to_sample: 2 # number of edges each node will take input from
|
||||
|
||||
eval:
|
||||
dartsspace:
|
||||
arch_index: 66
|
||||
model_desc:
|
||||
aux_weight: False # AuxTower class assumes specific input size hence breaks with many datasets.
|
||||
num_edges_to_sample: 2
|
||||
n_cells: 8
|
||||
loader:
|
||||
aug: ''
|
||||
cutout: -1 # cutout length, use cutout augmentation when > 0
|
||||
val_ratio: 0.0
|
||||
train_batch: 96
|
||||
trainer:
|
||||
use_val: False
|
||||
plotsdir: ''
|
||||
epochs: 100
|
|
@ -0,0 +1,9 @@
|
|||
__include__: './dataroot.yaml' # default dataset settings are for cifar
|
||||
|
||||
dataset:
|
||||
name: 'darcyflow'
|
||||
n_classes: 1 # not a classification task but a convenient hook. Imagine you have a 3x85x85 input and you want 1x85x85. (Do we need this?)
|
||||
channels: 3 # number of channels in image
|
||||
max_batches: -1 # if >= 0 then only these many batches are generated (useful for debugging)
|
||||
sub: 5 # parameter for creating grid used to process data
|
||||
storage_name: 'darcyflow' # name of folder or tar file to copy from cloud storage
|
|
@ -15,6 +15,7 @@ from archai.algos.gumbelsoftmax.gs_exp_runner import GsExperimentRunner
|
|||
from archai.algos.divnas.divnas_exp_runner import DivnasExperimentRunner
|
||||
from archai.algos.didarts.didarts_exp_runner import DiDartsExperimentRunner
|
||||
from archai.algos.random_sample_darts_space.darts_space_constant_random_archs_exp_runner import DartsSpaceConstantRandomArchsExperimentRunner
|
||||
from archai.algos.random_sample_darts_space.darts_space_constant_random_archs_darcyflow_exp_runner import DartsSpaceConstantRandomArchsDarcyflowExpRunner
|
||||
from archai.algos.proxynas.freeze_darts_space_experiment_runner import FreezeDartsSpaceExperimentRunner
|
||||
from archai.algos.proxynas.freeze_natsbench_experiment_runner import FreezeNatsbenchExperimentRunner
|
||||
from archai.algos.proxynas.freeze_natsbench_sss_experiment_runner import FreezeNatsbenchSSSExperimentRunner
|
||||
|
@ -52,6 +53,7 @@ def main():
|
|||
'divnas': DivnasExperimentRunner,
|
||||
'didarts': DiDartsExperimentRunner,
|
||||
'darts_space_constant_random_archs': DartsSpaceConstantRandomArchsExperimentRunner,
|
||||
'darts_space_constant_random_archs_darcyflow': DartsSpaceConstantRandomArchsDarcyflowExpRunner,
|
||||
'proxynas_darts_space': FreezeDartsSpaceExperimentRunner,
|
||||
'proxynas_natsbench_space': FreezeNatsbenchExperimentRunner,
|
||||
'proxynas_natsbench_sss_space': FreezeNatsbenchSSSExperimentRunner,
|
||||
|
|
Загрузка…
Ссылка в новой задаче