зеркало из https://github.com/microsoft/archai.git
All-No_pareto functional again
This commit is contained in:
Родитель
fd1fb0ab44
Коммит
096b564983
|
@ -214,7 +214,7 @@ class ApexUtils:
|
|||
else:
|
||||
return val
|
||||
|
||||
def _get_optim(self, multi_optim:MultiOptim)->Optimizer:
|
||||
def _get_one_optim(self, multi_optim:MultiOptim)->Optimizer:
|
||||
assert len(multi_optim)==1, \
|
||||
'Mixed precision is only supported for one optimizer' \
|
||||
f' but {len(multi_optim)} optimizers were supplied'
|
||||
|
@ -234,7 +234,10 @@ class ApexUtils:
|
|||
|
||||
def step(self, multi_optim:MultiOptim)->None:
|
||||
if self.is_mixed():
|
||||
self._scaler.step(self._get_optim(multi_optim)) # pyright: ignore[reportOptionalMemberAccess]
|
||||
# self._scaler.unscale_ will be called automatically if it isn't called yet from grad clipping
|
||||
# https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler.step
|
||||
for optim_shed in multi_optim:
|
||||
self._scaler.step(optim_shed.optim) # pyright: ignore[reportOptionalMemberAccess]
|
||||
self._scaler.update() # pyright: ignore[reportOptionalMemberAccess]
|
||||
else:
|
||||
multi_optim.step()
|
||||
|
@ -249,12 +252,13 @@ class ApexUtils:
|
|||
model = model.to(self.device)
|
||||
|
||||
# scale LR
|
||||
optim = self._get_optim(multi_optim)
|
||||
if self.is_dist() and self._scale_lr:
|
||||
lr = ml_utils.get_optim_lr(optim)
|
||||
scaled_lr = lr * self.world_size / float(batch_size)
|
||||
ml_utils.set_optim_lr(optim, scaled_lr)
|
||||
self._log_info({'lr_scaled': True, 'old_lr': lr, 'new_lr': scaled_lr})
|
||||
for optim_shed in multi_optim:
|
||||
optim = optim_shed.optim
|
||||
lr = ml_utils.get_optim_lr(optim)
|
||||
scaled_lr = lr * self.world_size / float(batch_size)
|
||||
ml_utils.set_optim_lr(optim, scaled_lr)
|
||||
self._log_info({'lr_scaled': True, 'old_lr': lr, 'new_lr': scaled_lr})
|
||||
|
||||
if self.is_dist():
|
||||
model = DistributedDataParallel(model, device_ids=[self._gpu], output_device=self._gpu)
|
||||
|
@ -264,8 +268,8 @@ class ApexUtils:
|
|||
def clip_grad(self, clip:float, model:nn.Module, multi_optim:MultiOptim)->None:
|
||||
if clip > 0.0:
|
||||
if self.is_mixed():
|
||||
optim = self._get_optim(multi_optim)
|
||||
self._scaler.unscale_(optim) # pyright: ignore[reportOptionalMemberAccess]
|
||||
# https://pytorch.org/docs/stable/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers
|
||||
self._scaler.unscale_(multi_optim[0].optim) # pyright: ignore[reportOptionalMemberAccess]
|
||||
nn.utils.clip_grad_norm_(model.parameters(), clip)
|
||||
else:
|
||||
nn.utils.clip_grad_norm_(model.parameters(), clip)
|
||||
|
|
|
@ -20,8 +20,8 @@ from archai.supergraph.nas.model import Model
|
|||
from archai.supergraph.utils import ml_utils
|
||||
from archai.supergraph.utils.checkpoint import CheckPoint
|
||||
from archai.supergraph.datasets import data
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.supergraph.algos.darts.bilevel_optimizer import BilevelOptimizer
|
||||
|
||||
class BilevelArchTrainer(ArchTrainer):
|
||||
|
|
|
@ -12,8 +12,8 @@ from torch.optim.optimizer import Optimizer
|
|||
from archai.common.config import Config
|
||||
from archai.common import utils
|
||||
from archai.supergraph.nas.model import Model
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.common.utils import zip_eq
|
||||
from archai.supergraph.utils import ml_utils
|
||||
|
||||
|
|
|
@ -12,8 +12,8 @@ from torch.optim.optimizer import Optimizer
|
|||
from archai.common.config import Config
|
||||
from archai.common import utils
|
||||
from archai.supergraph.nas.model import Model
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.common.utils import zip_eq
|
||||
from archai.supergraph.utils import ml_utils
|
||||
|
||||
|
|
|
@ -19,8 +19,8 @@ from archai.common import utils
|
|||
from archai.supergraph.nas.model import Model
|
||||
from archai.supergraph.utils import ml_utils
|
||||
from archai.supergraph.utils.checkpoint import CheckPoint
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.supergraph.utils.multi_optim import MultiOptim, OptimSched
|
||||
|
||||
class DidartsArchTrainer(ArchTrainer):
|
||||
|
|
|
@ -10,8 +10,8 @@ from torch import nn
|
|||
import numpy as np
|
||||
|
||||
from archai.common.common import get_conf
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.supergraph.datasets.data import get_data
|
||||
from archai.supergraph.nas.model import Model
|
||||
from archai.supergraph.nas.cell import Cell
|
||||
|
|
|
@ -14,8 +14,8 @@ import os
|
|||
|
||||
from archai.common.common import get_conf
|
||||
from archai.common.common import get_expdir
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.supergraph.datasets.data import get_data
|
||||
from archai.supergraph.nas.model import Model
|
||||
from archai.supergraph.nas.cell import Cell
|
||||
|
|
|
@ -20,8 +20,8 @@ from archai.common import utils
|
|||
from archai.supergraph.nas.model import Model
|
||||
from archai.supergraph.utils import ml_utils
|
||||
from archai.supergraph.utils.checkpoint import CheckPoint
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.common.common import get_conf
|
||||
from archai.supergraph.algos.gumbelsoftmax.gs_op import GsOp
|
||||
|
||||
|
|
|
@ -11,8 +11,8 @@ import os
|
|||
|
||||
from archai.common.common import get_conf
|
||||
from archai.common.common import get_expdir
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.supergraph.datasets.data import get_data
|
||||
from archai.supergraph.nas.model import Model
|
||||
from archai.supergraph.nas.cell import Cell
|
||||
|
|
|
@ -17,8 +17,8 @@ from archai.supergraph.utils import ml_utils
|
|||
|
||||
from archai.supergraph.utils.trainer import Trainer
|
||||
from archai.common.config import Config
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.supergraph.datasets import data
|
||||
from archai.supergraph.nas.model_desc import ModelDesc
|
||||
from archai.supergraph.nas.model_desc_builder import ModelDescBuilder
|
||||
|
|
|
@ -11,8 +11,8 @@ from overrides import overrides
|
|||
|
||||
from torch.utils.data.dataloader import DataLoader
|
||||
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
|
||||
from archai.common.config import Config
|
||||
from archai.supergraph.nas.model_desc_builder import ModelDescBuilder
|
||||
|
|
|
@ -77,12 +77,9 @@ class PetridishOp(Op):
|
|||
'avg_pool_3x3',
|
||||
'skip_connect', # identity
|
||||
'sep_conv_3x3',
|
||||
#'sep_conv_5x5',
|
||||
'sep_conv_5x5',
|
||||
'dil_conv_3x3',
|
||||
#'dil_conv_5x5',
|
||||
'mbconv_r3',
|
||||
'mbconv_r2',
|
||||
'mbconv_r1',
|
||||
'dil_conv_5x5',
|
||||
'none' # this must be at the end so top1 doesn't chose it
|
||||
]
|
||||
|
||||
|
@ -204,3 +201,4 @@ class PetridishOp(Op):
|
|||
# we store alphas in list so Pytorch don't register them
|
||||
self._alphas = list(self.arch_params().paramlist_by_kind('alphas'))
|
||||
assert len(self._alphas)==1
|
||||
|
||||
|
|
|
@ -18,8 +18,8 @@ import matplotlib.pyplot as plt
|
|||
from archai.supergraph.nas.model_desc import ConvMacroParams, CellDesc, CellType, OpDesc, \
|
||||
EdgeDesc, TensorShape, TensorShapes, NodeDesc, ModelDesc
|
||||
from archai.supergraph.utils.metrics import Metrics
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.common import utils
|
||||
|
||||
class JobStage(Enum):
|
||||
|
|
|
@ -25,8 +25,8 @@ from torch.utils.data.dataloader import DataLoader
|
|||
import yaml
|
||||
|
||||
from archai.common import common
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__),
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.common.common import CommonState
|
||||
from archai.supergraph.utils.checkpoint import CheckPoint
|
||||
from archai.common.config import Config
|
||||
|
|
|
@ -21,8 +21,8 @@ from archai.supergraph.nas.model import Model
|
|||
from archai.supergraph.nas.model_desc import CellType
|
||||
from archai.supergraph.utils import ml_utils
|
||||
from archai.supergraph.utils.checkpoint import CheckPoint
|
||||
from archai.common.logger import Logger
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
from archai.supergraph.datasets import data
|
||||
from archai.common.common import get_conf
|
||||
from archai.supergraph.algos.xnas.xnas_op import XnasOp
|
||||
|
|
|
@ -11,9 +11,9 @@ from torchvision.transforms import transforms
|
|||
from torch.utils.data.dataset import Dataset
|
||||
from torchvision.datasets.utils import check_integrity, download_url
|
||||
from archai.common.utils import download_and_extract_tar, extract_tar
|
||||
from archai.common.logger import Logger
|
||||
|
||||
logger = Logger(source=__name__)
|
||||
|
||||
from archai.common.common import logger
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -4,19 +4,18 @@ from torch import nn
|
|||
from torch.nn import DataParallel
|
||||
# from torchvision import models
|
||||
|
||||
from archai.supergraph.nas.models.resnet import ResNet
|
||||
from archai.supergraph.nas.models.pyramidnet import PyramidNet
|
||||
from archai.supergraph.nas.models.shakeshake.shake_resnet import ShakeResNet
|
||||
from archai.supergraph.nas.models.wideresnet import WideResNet
|
||||
from archai.supergraph.nas.models.shakeshake.shake_resnext import ShakeResNeXt
|
||||
from .pyramidnet import PyramidNet
|
||||
from .shakeshake.shake_resnet import ShakeResNet
|
||||
from .wideresnet import WideResNet
|
||||
from .shakeshake.shake_resnext import ShakeResNeXt
|
||||
|
||||
from archai.supergraph.nas.models.mobilenetv2 import *
|
||||
from archai.supergraph.nas.models.resnet_cifar10 import *
|
||||
from archai.supergraph.nas.models.vgg import *
|
||||
from archai.supergraph.nas.models.densenet import *
|
||||
from archai.supergraph.nas.models.resnet_orig import *
|
||||
from archai.supergraph.nas.models.googlenet import *
|
||||
from archai.supergraph.nas.models.inception import *
|
||||
from .mobilenetv2 import *
|
||||
from .resnet import *
|
||||
from .vgg import *
|
||||
from .densenet import *
|
||||
from .resnet_orig import *
|
||||
from .googlenet import *
|
||||
from .inception import *
|
||||
|
||||
|
||||
def get_model(conf, num_class=10):
|
||||
|
|
|
@ -2,7 +2,7 @@ import torch
|
|||
import torch.nn as nn
|
||||
import math
|
||||
|
||||
from archai.supergraph.nas.models.shakedrop import ShakeDrop
|
||||
from .shakedrop import ShakeDrop
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
|
|
|
@ -1,31 +1,44 @@
|
|||
# Original code: https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import math
|
||||
import os
|
||||
|
||||
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
|
||||
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d']
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
"3x3 convolution with padding"
|
||||
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
||||
"""3x3 convolution with padding"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
padding=dilation, groups=groups, bias=False, dilation=dilation)
|
||||
|
||||
|
||||
def conv1x1(in_planes, out_planes, stride=1):
|
||||
"""1x1 convolution"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
|
||||
base_width=64, dilation=1, norm_layer=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
if groups != 1 or base_width != 64:
|
||||
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||
if dilation > 1:
|
||||
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
|
||||
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.bn1 = norm_layer(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = norm_layer(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
|
@ -35,9 +48,9 @@ class BasicBlock(nn.Module):
|
|||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
@ -46,22 +59,25 @@ class BasicBlock(nn.Module):
|
|||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
|
||||
base_width=64, dilation=1, norm_layer=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * Bottleneck.expansion, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * Bottleneck.expansion)
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
width = int(planes * (base_width / 64.)) * groups
|
||||
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv1x1(inplanes, width)
|
||||
self.bn1 = norm_layer(width)
|
||||
self.conv2 = conv3x3(width, width, stride, groups, dilation)
|
||||
self.bn2 = norm_layer(width)
|
||||
self.conv3 = conv1x1(width, planes * self.expansion)
|
||||
self.bn3 = norm_layer(planes * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
|
@ -73,108 +89,199 @@ class Bottleneck(nn.Module):
|
|||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
def __init__(self, dataset, depth, n_classes, bottleneck=False):
|
||||
|
||||
def __init__(self, block, layers, num_classes=10, zero_init_residual=False,
|
||||
groups=1, width_per_group=64, replace_stride_with_dilation=None,
|
||||
norm_layer=None):
|
||||
super(ResNet, self).__init__()
|
||||
self.dataset = dataset
|
||||
if self.dataset.startswith('cifar'):
|
||||
self.inplanes = 16
|
||||
#logger.info(bottleneck)
|
||||
if bottleneck == True:
|
||||
n = int((depth - 2) / 9)
|
||||
block = Bottleneck
|
||||
else:
|
||||
n = int((depth - 2) / 6)
|
||||
block = BasicBlock
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
self._norm_layer = norm_layer
|
||||
|
||||
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(self.inplanes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.layer1 = self._make_layer(block, 16, n)
|
||||
self.layer2 = self._make_layer(block, 32, n, stride=2)
|
||||
self.layer3 = self._make_layer(block, 64, n, stride=2)
|
||||
# self.avgpool = nn.AvgPool2d(8)
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(64 * block.expansion, n_classes)
|
||||
self.inplanes = 64
|
||||
self.dilation = 1
|
||||
if replace_stride_with_dilation is None:
|
||||
# each element in the tuple indicates if we should replace
|
||||
# the 2x2 stride with a dilated convolution instead
|
||||
replace_stride_with_dilation = [False, False, False]
|
||||
if len(replace_stride_with_dilation) != 3:
|
||||
raise ValueError("replace_stride_with_dilation should be None "
|
||||
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
|
||||
self.groups = groups
|
||||
self.base_width = width_per_group
|
||||
|
||||
elif dataset == 'imagenet':
|
||||
blocks ={18: BasicBlock, 34: BasicBlock, 50: Bottleneck, 101: Bottleneck, 152: Bottleneck, 200: Bottleneck}
|
||||
layers ={18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3], 200: [3, 24, 36, 3]}
|
||||
assert layers[depth], 'invalid detph for ResNet (depth should be one of 18, 34, 50, 101, 152, and 200)'
|
||||
## CIFAR10: kernel_size 7 -> 3, stride 2 -> 1, padding 3->1
|
||||
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||
## END
|
||||
|
||||
self.inplanes = 64
|
||||
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(blocks[depth], 64, layers[depth][0])
|
||||
self.layer2 = self._make_layer(blocks[depth], 128, layers[depth][1], stride=2)
|
||||
self.layer3 = self._make_layer(blocks[depth], 256, layers[depth][2], stride=2)
|
||||
self.layer4 = self._make_layer(blocks[depth], 512, layers[depth][3], stride=2)
|
||||
# self.avgpool = nn.AvgPool2d(7)
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(512 * blocks[depth].expansion, n_classes)
|
||||
self.bn1 = norm_layer(self.inplanes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
|
||||
dilate=replace_stride_with_dilation[0])
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
|
||||
dilate=replace_stride_with_dilation[1])
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
|
||||
dilate=replace_stride_with_dilation[2])
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1):
|
||||
# Zero-initialize the last BN in each residual branch,
|
||||
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
|
||||
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
|
||||
if zero_init_residual:
|
||||
for m in self.modules():
|
||||
if isinstance(m, Bottleneck):
|
||||
nn.init.constant_(m.bn3.weight, 0)
|
||||
elif isinstance(m, BasicBlock):
|
||||
nn.init.constant_(m.bn2.weight, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
||||
norm_layer = self._norm_layer
|
||||
downsample = None
|
||||
previous_dilation = self.dilation
|
||||
if dilate:
|
||||
self.dilation *= stride
|
||||
stride = 1
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
conv1x1(self.inplanes, planes * block.expansion, stride),
|
||||
norm_layer(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
|
||||
self.base_width, previous_dilation, norm_layer))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
for _ in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, groups=self.groups,
|
||||
base_width=self.base_width, dilation=self.dilation,
|
||||
norm_layer=norm_layer))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
if self.dataset == 'cifar10' or self.dataset == 'cifar100':
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
elif self.dataset == 'imagenet':
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
x = self.avgpool(x)
|
||||
x = x.reshape(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def _resnet(arch, block, layers, pretrained, progress, device, **kwargs):
|
||||
model = ResNet(block, layers, **kwargs)
|
||||
if pretrained:
|
||||
script_dir = os.path.dirname(__file__)
|
||||
state_dict = torch.load(script_dir + '/state_dicts/'+arch+'.pt', map_location=device)
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def resnet18(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-18 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet34(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-34 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet50(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-50 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet101(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-101 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet152(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-152 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnext50_32x4d(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNeXt-50 32x4d model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 4
|
||||
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
|
||||
pretrained, progress, device, **kwargs)
|
||||
|
||||
|
||||
def resnext101_32x8d(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNeXt-101 32x8d model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 8
|
||||
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
|
||||
pretrained, progress, device, **kwargs)
|
||||
|
|
|
@ -1,287 +0,0 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import os
|
||||
|
||||
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
|
||||
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d']
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
||||
"""3x3 convolution with padding"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=dilation, groups=groups, bias=False, dilation=dilation)
|
||||
|
||||
|
||||
def conv1x1(in_planes, out_planes, stride=1):
|
||||
"""1x1 convolution"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
|
||||
base_width=64, dilation=1, norm_layer=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
if groups != 1 or base_width != 64:
|
||||
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||
if dilation > 1:
|
||||
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
|
||||
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = norm_layer(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = norm_layer(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
|
||||
base_width=64, dilation=1, norm_layer=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
width = int(planes * (base_width / 64.)) * groups
|
||||
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv1x1(inplanes, width)
|
||||
self.bn1 = norm_layer(width)
|
||||
self.conv2 = conv3x3(width, width, stride, groups, dilation)
|
||||
self.bn2 = norm_layer(width)
|
||||
self.conv3 = conv1x1(width, planes * self.expansion)
|
||||
self.bn3 = norm_layer(planes * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, num_classes=10, zero_init_residual=False,
|
||||
groups=1, width_per_group=64, replace_stride_with_dilation=None,
|
||||
norm_layer=None):
|
||||
super(ResNet, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
self._norm_layer = norm_layer
|
||||
|
||||
self.inplanes = 64
|
||||
self.dilation = 1
|
||||
if replace_stride_with_dilation is None:
|
||||
# each element in the tuple indicates if we should replace
|
||||
# the 2x2 stride with a dilated convolution instead
|
||||
replace_stride_with_dilation = [False, False, False]
|
||||
if len(replace_stride_with_dilation) != 3:
|
||||
raise ValueError("replace_stride_with_dilation should be None "
|
||||
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
|
||||
self.groups = groups
|
||||
self.base_width = width_per_group
|
||||
|
||||
## CIFAR10: kernel_size 7 -> 3, stride 2 -> 1, padding 3->1
|
||||
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||
## END
|
||||
|
||||
self.bn1 = norm_layer(self.inplanes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
|
||||
dilate=replace_stride_with_dilation[0])
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
|
||||
dilate=replace_stride_with_dilation[1])
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
|
||||
dilate=replace_stride_with_dilation[2])
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
# Zero-initialize the last BN in each residual branch,
|
||||
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
|
||||
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
|
||||
if zero_init_residual:
|
||||
for m in self.modules():
|
||||
if isinstance(m, Bottleneck):
|
||||
nn.init.constant_(m.bn3.weight, 0)
|
||||
elif isinstance(m, BasicBlock):
|
||||
nn.init.constant_(m.bn2.weight, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
||||
norm_layer = self._norm_layer
|
||||
downsample = None
|
||||
previous_dilation = self.dilation
|
||||
if dilate:
|
||||
self.dilation *= stride
|
||||
stride = 1
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
conv1x1(self.inplanes, planes * block.expansion, stride),
|
||||
norm_layer(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
|
||||
self.base_width, previous_dilation, norm_layer))
|
||||
self.inplanes = planes * block.expansion
|
||||
for _ in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, groups=self.groups,
|
||||
base_width=self.base_width, dilation=self.dilation,
|
||||
norm_layer=norm_layer))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.reshape(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def _resnet(arch, block, layers, pretrained, progress, device, **kwargs):
|
||||
model = ResNet(block, layers, **kwargs)
|
||||
if pretrained:
|
||||
script_dir = os.path.dirname(__file__)
|
||||
state_dict = torch.load(script_dir + '/state_dicts/'+arch+'.pt', map_location=device)
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def resnet18(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-18 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet34(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-34 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet50(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-50 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet101(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-101 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet152(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNet-152 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, device,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnext50_32x4d(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNeXt-50 32x4d model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 4
|
||||
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
|
||||
pretrained, progress, device, **kwargs)
|
||||
|
||||
|
||||
def resnext101_32x8d(pretrained=False, progress=True, device='cpu', **kwargs):
|
||||
"""Constructs a ResNeXt-101 32x8d model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 8
|
||||
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
|
||||
pretrained, progress, device, **kwargs)
|
|
@ -5,7 +5,7 @@ import math
|
|||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from archai.supergraph.nas.models.shakeshake.shakeshake import ShakeShake, Shortcut
|
||||
from .shakeshake import ShakeShake, Shortcut
|
||||
|
||||
|
||||
class ShakeBlock(nn.Module):
|
||||
|
|
|
@ -5,7 +5,7 @@ import math
|
|||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from archai.supergraph.nas.models.shakeshake.shakeshake import ShakeShake, Shortcut
|
||||
from .shakeshake import ShakeShake, Shortcut
|
||||
|
||||
|
||||
class ShakeBottleNeck(nn.Module):
|
||||
|
|
|
@ -95,9 +95,9 @@ class Evaluater(EnforceOverrides):
|
|||
# TODO: below detection code is too week, need to improve, possibly encode image size in yaml and use that instead
|
||||
if dataset_name.startswith('cifar'):
|
||||
if function_name.startswith('res'): # support resnext as well
|
||||
module_name = 'archai.cifar10_models.resnet'
|
||||
module_name = 'archai.supergraph.models.resnet'
|
||||
elif function_name.startswith('dense'):
|
||||
module_name = 'archai.cifar10_models.densenet'
|
||||
module_name = 'archai.supergraph.models.densenet'
|
||||
elif dataset_name.startswith('imagenet') or dataset_name.startswith('sport8'):
|
||||
module_name = 'torchvision.models'
|
||||
if not module_name:
|
||||
|
|
|
@ -3,8 +3,6 @@ __include__: 'darts.yaml' # defaults are loaded from this file
|
|||
common:
|
||||
#yaml_log: False
|
||||
apex:
|
||||
enabled: False # global switch to disable everything apex
|
||||
distributed_enabled: False # enable/disable distributed mode
|
||||
ray:
|
||||
enabled: True # initialize ray. Note: ray cannot be used if apex distributed is enabled
|
||||
local_mode: False # if True then ray runs in serial mode
|
||||
|
@ -12,50 +10,31 @@ common:
|
|||
nas:
|
||||
eval:
|
||||
final_desc_foldername: '$expdir/model_desc_gallery' #
|
||||
source_desc_foldername: '$expdir/model_desc_gallery'
|
||||
model_desc:
|
||||
n_reductions: 2 # number of reductions to be applied
|
||||
n_cells: 10 # number of max cells, for pareto frontier, we use cell_count_scale to multiply cells and limit by n_cells
|
||||
aux_weight: 0.0 # weight for loss from auxiliary towers in test time arch
|
||||
n_cells: 20 # number of max cells, for pareto frontier, we use cell_count_scale to multiply cells and limit by n_cells
|
||||
aux_weight: 0.4 # weight for loss from auxiliary towers in test time arch
|
||||
num_edges_to_sample: 2 # number of edges each node will take inputs from
|
||||
aux_tower_stride: 3
|
||||
model_stems:
|
||||
ops: ['stem_conv3x3_s2', 'stem_conv3x3_s2']
|
||||
init_node_ch: 32 # num of input/output channels for nodes in 1st cell
|
||||
stem_multiplier: 1 # output channels multiplier for the stem
|
||||
init_node_ch: 36 # num of input/output channels for nodes in 1st cell
|
||||
cell:
|
||||
n_nodes: 5 # number of nodes in a cell if template desc is not provided
|
||||
cell_post_op: 'proj_channels'
|
||||
petridish:
|
||||
cell_count_scale: 1.0 # for eval first multiply number of cells used in search by this factor, limit to n_cells
|
||||
trainer:
|
||||
aux_weight: 0.0
|
||||
epochs: 1500
|
||||
batch_chunks: 1
|
||||
validation:
|
||||
batch_chunks: 1
|
||||
optimizer:
|
||||
lr: 0.033
|
||||
loader:
|
||||
cutout: 6 # cutout length, use cutout augmentation when > 0
|
||||
load_train: True # load train split of dataset
|
||||
train_batch: 32
|
||||
test_batch: 32
|
||||
img_size: 16
|
||||
aug: 'autoaug_cifar10'
|
||||
# dataset:
|
||||
# max_batches: 32
|
||||
|
||||
epochs: 600
|
||||
search:
|
||||
final_desc_foldername: '$expdir/model_desc_gallery' # the gallery of models that eval will train from scratch
|
||||
petridish:
|
||||
convex_hull_eps: 0.025 # tolerance
|
||||
max_madd: 20000000 # if any parent model reaches this many multiply-additions then the search is terminated or it reaches maximum number of parent pool size
|
||||
max_madd: 200000000 # if any parent model reaches this many multiply-additions then the search is terminated or it reaches maximum number of parent pool size
|
||||
max_hull_points: 100 # if the pool of parent models reaches this size then search is terminated or if it reaches max multiply-adds
|
||||
checkpoints_foldername: '$expdir/petridish_search_checkpoints'
|
||||
search_iters: 4
|
||||
pareto:
|
||||
max_cells: 10
|
||||
max_reductions: 2
|
||||
max_cells: 8
|
||||
max_reductions: 3
|
||||
max_nodes: 3
|
||||
enabled: True # if false then there will only be one seed model. if true a number of seed models with different number of cells, reductions and nodes will be used to initialize the search. this provides more coverage of the frontier.
|
||||
model_desc:
|
||||
|
@ -63,52 +42,21 @@ nas:
|
|||
n_reductions: 1
|
||||
num_edges_to_sample: 2 # number of edges each node will take inputs from
|
||||
cell:
|
||||
n_nodes: 1 # also used as min nodes to get combinations for seeding pareto
|
||||
n_nodes: 1
|
||||
cell_post_op: 'proj_channels'
|
||||
model_stems:
|
||||
ops: ['stem_conv3x3_s2', 'stem_conv3x3_s2']
|
||||
stem_multiplier: 1 # output channels multiplier for the stem
|
||||
init_node_ch: 32 # num of input/output channels for nodes in 1st cell
|
||||
seed_train:
|
||||
trainer:
|
||||
epochs: 80 # number of epochs model will be trained before search
|
||||
optimizer:
|
||||
lr: 0.033
|
||||
batch_chunks: 1
|
||||
validation:
|
||||
batch_chunks: 1
|
||||
loader:
|
||||
cutout: 6
|
||||
train_batch: 32
|
||||
test_batch: 32
|
||||
img_size: 16
|
||||
aug: ''
|
||||
# dataset:
|
||||
# max_batches: 32
|
||||
train_batch: 128
|
||||
post_train:
|
||||
trainer:
|
||||
epochs: 80 # number of epochs model will be trained after search
|
||||
optimizer:
|
||||
lr: 0.033
|
||||
loader:
|
||||
train_batch: 32
|
||||
cutout: 6
|
||||
test_batch: 32
|
||||
img_size: 16
|
||||
aug: ''
|
||||
# dataset:
|
||||
# max_batches: 32
|
||||
train_batch: 96
|
||||
trainer:
|
||||
l1_alphas: 0.001 # as per paper
|
||||
epochs: 80 # number of epochs model will be trained during search
|
||||
optimizer:
|
||||
lr: 0.033
|
||||
loader:
|
||||
train_batch: 32
|
||||
val_ratio: 0.2 #split portion for train set, 0 to 1
|
||||
cutout: 6
|
||||
test_batch: 32
|
||||
img_size: 16
|
||||
aug: ''
|
||||
# dataset:
|
||||
# max_batches: 32
|
||||
train_batch: 96
|
||||
val_ratio: 0.2 #split portion for test set, 0 to 1
|
Загрузка…
Ссылка в новой задаче