All-No_pareto functional again

This commit is contained in:
Shital Shah 2023-01-21 02:41:49 -08:00
Родитель fd1fb0ab44
Коммит 096b564983
24 изменённых файлов: 280 добавлений и 511 удалений

Просмотреть файл

@ -214,7 +214,7 @@ class ApexUtils:
else:
return val
def _get_optim(self, multi_optim:MultiOptim)->Optimizer:
def _get_one_optim(self, multi_optim:MultiOptim)->Optimizer:
assert len(multi_optim)==1, \
'Mixed precision is only supported for one optimizer' \
f' but {len(multi_optim)} optimizers were supplied'
@ -234,7 +234,10 @@ class ApexUtils:
def step(self, multi_optim:MultiOptim)->None:
if self.is_mixed():
self._scaler.step(self._get_optim(multi_optim)) # pyright: ignore[reportOptionalMemberAccess]
# self._scaler.unscale_ will be called automatically if it isn't called yet from grad clipping
# https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler.step
for optim_shed in multi_optim:
self._scaler.step(optim_shed.optim) # pyright: ignore[reportOptionalMemberAccess]
self._scaler.update() # pyright: ignore[reportOptionalMemberAccess]
else:
multi_optim.step()
@ -249,12 +252,13 @@ class ApexUtils:
model = model.to(self.device)
# scale LR
optim = self._get_optim(multi_optim)
if self.is_dist() and self._scale_lr:
lr = ml_utils.get_optim_lr(optim)
scaled_lr = lr * self.world_size / float(batch_size)
ml_utils.set_optim_lr(optim, scaled_lr)
self._log_info({'lr_scaled': True, 'old_lr': lr, 'new_lr': scaled_lr})
for optim_shed in multi_optim:
optim = optim_shed.optim
lr = ml_utils.get_optim_lr(optim)
scaled_lr = lr * self.world_size / float(batch_size)
ml_utils.set_optim_lr(optim, scaled_lr)
self._log_info({'lr_scaled': True, 'old_lr': lr, 'new_lr': scaled_lr})
if self.is_dist():
model = DistributedDataParallel(model, device_ids=[self._gpu], output_device=self._gpu)
@ -264,8 +268,8 @@ class ApexUtils:
def clip_grad(self, clip:float, model:nn.Module, multi_optim:MultiOptim)->None:
if clip > 0.0:
if self.is_mixed():
optim = self._get_optim(multi_optim)
self._scaler.unscale_(optim) # pyright: ignore[reportOptionalMemberAccess]
# https://pytorch.org/docs/stable/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers
self._scaler.unscale_(multi_optim[0].optim) # pyright: ignore[reportOptionalMemberAccess]
nn.utils.clip_grad_norm_(model.parameters(), clip)
else:
nn.utils.clip_grad_norm_(model.parameters(), clip)

Просмотреть файл

@ -20,8 +20,8 @@ from archai.supergraph.nas.model import Model
from archai.supergraph.utils import ml_utils
from archai.supergraph.utils.checkpoint import CheckPoint
from archai.supergraph.datasets import data
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.supergraph.algos.darts.bilevel_optimizer import BilevelOptimizer
class BilevelArchTrainer(ArchTrainer):

Просмотреть файл

@ -12,8 +12,8 @@ from torch.optim.optimizer import Optimizer
from archai.common.config import Config
from archai.common import utils
from archai.supergraph.nas.model import Model
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.common.utils import zip_eq
from archai.supergraph.utils import ml_utils

Просмотреть файл

@ -12,8 +12,8 @@ from torch.optim.optimizer import Optimizer
from archai.common.config import Config
from archai.common import utils
from archai.supergraph.nas.model import Model
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.common.utils import zip_eq
from archai.supergraph.utils import ml_utils

Просмотреть файл

@ -19,8 +19,8 @@ from archai.common import utils
from archai.supergraph.nas.model import Model
from archai.supergraph.utils import ml_utils
from archai.supergraph.utils.checkpoint import CheckPoint
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.supergraph.utils.multi_optim import MultiOptim, OptimSched
class DidartsArchTrainer(ArchTrainer):

Просмотреть файл

@ -10,8 +10,8 @@ from torch import nn
import numpy as np
from archai.common.common import get_conf
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.supergraph.datasets.data import get_data
from archai.supergraph.nas.model import Model
from archai.supergraph.nas.cell import Cell

Просмотреть файл

@ -14,8 +14,8 @@ import os
from archai.common.common import get_conf
from archai.common.common import get_expdir
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.supergraph.datasets.data import get_data
from archai.supergraph.nas.model import Model
from archai.supergraph.nas.cell import Cell

Просмотреть файл

@ -20,8 +20,8 @@ from archai.common import utils
from archai.supergraph.nas.model import Model
from archai.supergraph.utils import ml_utils
from archai.supergraph.utils.checkpoint import CheckPoint
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.common.common import get_conf
from archai.supergraph.algos.gumbelsoftmax.gs_op import GsOp

Просмотреть файл

@ -11,8 +11,8 @@ import os
from archai.common.common import get_conf
from archai.common.common import get_expdir
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.supergraph.datasets.data import get_data
from archai.supergraph.nas.model import Model
from archai.supergraph.nas.cell import Cell

Просмотреть файл

@ -17,8 +17,8 @@ from archai.supergraph.utils import ml_utils
from archai.supergraph.utils.trainer import Trainer
from archai.common.config import Config
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.supergraph.datasets import data
from archai.supergraph.nas.model_desc import ModelDesc
from archai.supergraph.nas.model_desc_builder import ModelDescBuilder

Просмотреть файл

@ -11,8 +11,8 @@ from overrides import overrides
from torch.utils.data.dataloader import DataLoader
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.common.config import Config
from archai.supergraph.nas.model_desc_builder import ModelDescBuilder

Просмотреть файл

@ -77,12 +77,9 @@ class PetridishOp(Op):
'avg_pool_3x3',
'skip_connect', # identity
'sep_conv_3x3',
#'sep_conv_5x5',
'sep_conv_5x5',
'dil_conv_3x3',
#'dil_conv_5x5',
'mbconv_r3',
'mbconv_r2',
'mbconv_r1',
'dil_conv_5x5',
'none' # this must be at the end so top1 doesn't chose it
]
@ -204,3 +201,4 @@ class PetridishOp(Op):
# we store alphas in list so Pytorch don't register them
self._alphas = list(self.arch_params().paramlist_by_kind('alphas'))
assert len(self._alphas)==1

Просмотреть файл

@ -18,8 +18,8 @@ import matplotlib.pyplot as plt
from archai.supergraph.nas.model_desc import ConvMacroParams, CellDesc, CellType, OpDesc, \
EdgeDesc, TensorShape, TensorShapes, NodeDesc, ModelDesc
from archai.supergraph.utils.metrics import Metrics
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.common import utils
class JobStage(Enum):

Просмотреть файл

@ -25,8 +25,8 @@ from torch.utils.data.dataloader import DataLoader
import yaml
from archai.common import common
from archai.common.logger import Logger
logger = Logger(source=__name__),
from archai.common.common import logger
from archai.common.common import CommonState
from archai.supergraph.utils.checkpoint import CheckPoint
from archai.common.config import Config

Просмотреть файл

@ -21,8 +21,8 @@ from archai.supergraph.nas.model import Model
from archai.supergraph.nas.model_desc import CellType
from archai.supergraph.utils import ml_utils
from archai.supergraph.utils.checkpoint import CheckPoint
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger
from archai.supergraph.datasets import data
from archai.common.common import get_conf
from archai.supergraph.algos.xnas.xnas_op import XnasOp

Просмотреть файл

@ -11,9 +11,9 @@ from torchvision.transforms import transforms
from torch.utils.data.dataset import Dataset
from torchvision.datasets.utils import check_integrity, download_url
from archai.common.utils import download_and_extract_tar, extract_tar
from archai.common.logger import Logger
logger = Logger(source=__name__)
from archai.common.common import logger

Просмотреть файл

@ -4,19 +4,18 @@ from torch import nn
from torch.nn import DataParallel
# from torchvision import models
from archai.supergraph.nas.models.resnet import ResNet
from archai.supergraph.nas.models.pyramidnet import PyramidNet
from archai.supergraph.nas.models.shakeshake.shake_resnet import ShakeResNet
from archai.supergraph.nas.models.wideresnet import WideResNet
from archai.supergraph.nas.models.shakeshake.shake_resnext import ShakeResNeXt
from .pyramidnet import PyramidNet
from .shakeshake.shake_resnet import ShakeResNet
from .wideresnet import WideResNet
from .shakeshake.shake_resnext import ShakeResNeXt
from archai.supergraph.nas.models.mobilenetv2 import *
from archai.supergraph.nas.models.resnet_cifar10 import *
from archai.supergraph.nas.models.vgg import *
from archai.supergraph.nas.models.densenet import *
from archai.supergraph.nas.models.resnet_orig import *
from archai.supergraph.nas.models.googlenet import *
from archai.supergraph.nas.models.inception import *
from .mobilenetv2 import *
from .resnet import *
from .vgg import *
from .densenet import *
from .resnet_orig import *
from .googlenet import *
from .inception import *
def get_model(conf, num_class=10):

Просмотреть файл

@ -2,7 +2,7 @@ import torch
import torch.nn as nn
import math
from archai.supergraph.nas.models.shakedrop import ShakeDrop
from .shakedrop import ShakeDrop
def conv3x3(in_planes, out_planes, stride=1):

Просмотреть файл

@ -1,31 +1,44 @@
# Original code: https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
import torch
import torch.nn as nn
import math
import os
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d']
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
identity = x
out = self.conv1(x)
out = self.bn1(out)
@ -35,9 +48,9 @@ class BasicBlock(nn.Module):
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
identity = self.downsample(x)
out += residual
out += identity
out = self.relu(out)
return out
@ -46,22 +59,25 @@ class BasicBlock(nn.Module):
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * Bottleneck.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * Bottleneck.expansion)
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
identity = x
out = self.conv1(x)
out = self.bn1(out)
@ -73,108 +89,199 @@ class Bottleneck(nn.Module):
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, dataset, depth, n_classes, bottleneck=False):
def __init__(self, block, layers, num_classes=10, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
super(ResNet, self).__init__()
self.dataset = dataset
if self.dataset.startswith('cifar'):
self.inplanes = 16
#logger.info(bottleneck)
if bottleneck == True:
n = int((depth - 2) / 9)
block = Bottleneck
else:
n = int((depth - 2) / 6)
block = BasicBlock
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self._make_layer(block, 16, n)
self.layer2 = self._make_layer(block, 32, n, stride=2)
self.layer3 = self._make_layer(block, 64, n, stride=2)
# self.avgpool = nn.AvgPool2d(8)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(64 * block.expansion, n_classes)
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
elif dataset == 'imagenet':
blocks ={18: BasicBlock, 34: BasicBlock, 50: Bottleneck, 101: Bottleneck, 152: Bottleneck, 200: Bottleneck}
layers ={18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3], 200: [3, 24, 36, 3]}
assert layers[depth], 'invalid detph for ResNet (depth should be one of 18, 34, 50, 101, 152, and 200)'
## CIFAR10: kernel_size 7 -> 3, stride 2 -> 1, padding 3->1
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
## END
self.inplanes = 64
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(blocks[depth], 64, layers[depth][0])
self.layer2 = self._make_layer(blocks[depth], 128, layers[depth][1], stride=2)
self.layer3 = self._make_layer(blocks[depth], 256, layers[depth][2], stride=2)
self.layer4 = self._make_layer(blocks[depth], 512, layers[depth][3], stride=2)
# self.avgpool = nn.AvgPool2d(7)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * blocks[depth].expansion, n_classes)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def _make_layer(self, block, planes, blocks, stride=1):
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def forward(self, x):
if self.dataset == 'cifar10' or self.dataset == 'cifar100':
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
elif self.dataset == 'imagenet':
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
x = self.avgpool(x)
x = x.reshape(x.size(0), -1)
x = self.fc(x)
return x
def _resnet(arch, block, layers, pretrained, progress, device, **kwargs):
model = ResNet(block, layers, **kwargs)
if pretrained:
script_dir = os.path.dirname(__file__)
state_dict = torch.load(script_dir + '/state_dicts/'+arch+'.pt', map_location=device)
model.load_state_dict(state_dict)
return model
def resnet18(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, device,
**kwargs)
def resnet34(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, device,
**kwargs)
def resnet50(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, device,
**kwargs)
def resnet101(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, device,
**kwargs)
def resnet152(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, device,
**kwargs)
def resnext50_32x4d(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNeXt-50 32x4d model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, device, **kwargs)
def resnext101_32x8d(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNeXt-101 32x8d model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, device, **kwargs)

Просмотреть файл

@ -1,287 +0,0 @@
import torch
import torch.nn as nn
import os
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d']
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=10, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
## CIFAR10: kernel_size 7 -> 3, stride 2 -> 1, padding 3->1
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
## END
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.reshape(x.size(0), -1)
x = self.fc(x)
return x
def _resnet(arch, block, layers, pretrained, progress, device, **kwargs):
model = ResNet(block, layers, **kwargs)
if pretrained:
script_dir = os.path.dirname(__file__)
state_dict = torch.load(script_dir + '/state_dicts/'+arch+'.pt', map_location=device)
model.load_state_dict(state_dict)
return model
def resnet18(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, device,
**kwargs)
def resnet34(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, device,
**kwargs)
def resnet50(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, device,
**kwargs)
def resnet101(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, device,
**kwargs)
def resnet152(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, device,
**kwargs)
def resnext50_32x4d(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNeXt-50 32x4d model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, device, **kwargs)
def resnext101_32x8d(pretrained=False, progress=True, device='cpu', **kwargs):
"""Constructs a ResNeXt-101 32x8d model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, device, **kwargs)

Просмотреть файл

@ -5,7 +5,7 @@ import math
import torch.nn as nn
import torch.nn.functional as F
from archai.supergraph.nas.models.shakeshake.shakeshake import ShakeShake, Shortcut
from .shakeshake import ShakeShake, Shortcut
class ShakeBlock(nn.Module):

Просмотреть файл

@ -5,7 +5,7 @@ import math
import torch.nn as nn
import torch.nn.functional as F
from archai.supergraph.nas.models.shakeshake.shakeshake import ShakeShake, Shortcut
from .shakeshake import ShakeShake, Shortcut
class ShakeBottleNeck(nn.Module):

Просмотреть файл

@ -95,9 +95,9 @@ class Evaluater(EnforceOverrides):
# TODO: below detection code is too week, need to improve, possibly encode image size in yaml and use that instead
if dataset_name.startswith('cifar'):
if function_name.startswith('res'): # support resnext as well
module_name = 'archai.cifar10_models.resnet'
module_name = 'archai.supergraph.models.resnet'
elif function_name.startswith('dense'):
module_name = 'archai.cifar10_models.densenet'
module_name = 'archai.supergraph.models.densenet'
elif dataset_name.startswith('imagenet') or dataset_name.startswith('sport8'):
module_name = 'torchvision.models'
if not module_name:

Просмотреть файл

@ -3,8 +3,6 @@ __include__: 'darts.yaml' # defaults are loaded from this file
common:
#yaml_log: False
apex:
enabled: False # global switch to disable everything apex
distributed_enabled: False # enable/disable distributed mode
ray:
enabled: True # initialize ray. Note: ray cannot be used if apex distributed is enabled
local_mode: False # if True then ray runs in serial mode
@ -12,50 +10,31 @@ common:
nas:
eval:
final_desc_foldername: '$expdir/model_desc_gallery' #
source_desc_foldername: '$expdir/model_desc_gallery'
model_desc:
n_reductions: 2 # number of reductions to be applied
n_cells: 10 # number of max cells, for pareto frontier, we use cell_count_scale to multiply cells and limit by n_cells
aux_weight: 0.0 # weight for loss from auxiliary towers in test time arch
n_cells: 20 # number of max cells, for pareto frontier, we use cell_count_scale to multiply cells and limit by n_cells
aux_weight: 0.4 # weight for loss from auxiliary towers in test time arch
num_edges_to_sample: 2 # number of edges each node will take inputs from
aux_tower_stride: 3
model_stems:
ops: ['stem_conv3x3_s2', 'stem_conv3x3_s2']
init_node_ch: 32 # num of input/output channels for nodes in 1st cell
stem_multiplier: 1 # output channels multiplier for the stem
init_node_ch: 36 # num of input/output channels for nodes in 1st cell
cell:
n_nodes: 5 # number of nodes in a cell if template desc is not provided
cell_post_op: 'proj_channels'
petridish:
cell_count_scale: 1.0 # for eval first multiply number of cells used in search by this factor, limit to n_cells
trainer:
aux_weight: 0.0
epochs: 1500
batch_chunks: 1
validation:
batch_chunks: 1
optimizer:
lr: 0.033
loader:
cutout: 6 # cutout length, use cutout augmentation when > 0
load_train: True # load train split of dataset
train_batch: 32
test_batch: 32
img_size: 16
aug: 'autoaug_cifar10'
# dataset:
# max_batches: 32
epochs: 600
search:
final_desc_foldername: '$expdir/model_desc_gallery' # the gallery of models that eval will train from scratch
petridish:
convex_hull_eps: 0.025 # tolerance
max_madd: 20000000 # if any parent model reaches this many multiply-additions then the search is terminated or it reaches maximum number of parent pool size
max_madd: 200000000 # if any parent model reaches this many multiply-additions then the search is terminated or it reaches maximum number of parent pool size
max_hull_points: 100 # if the pool of parent models reaches this size then search is terminated or if it reaches max multiply-adds
checkpoints_foldername: '$expdir/petridish_search_checkpoints'
search_iters: 4
pareto:
max_cells: 10
max_reductions: 2
max_cells: 8
max_reductions: 3
max_nodes: 3
enabled: True # if false then there will only be one seed model. if true a number of seed models with different number of cells, reductions and nodes will be used to initialize the search. this provides more coverage of the frontier.
model_desc:
@ -63,52 +42,21 @@ nas:
n_reductions: 1
num_edges_to_sample: 2 # number of edges each node will take inputs from
cell:
n_nodes: 1 # also used as min nodes to get combinations for seeding pareto
n_nodes: 1
cell_post_op: 'proj_channels'
model_stems:
ops: ['stem_conv3x3_s2', 'stem_conv3x3_s2']
stem_multiplier: 1 # output channels multiplier for the stem
init_node_ch: 32 # num of input/output channels for nodes in 1st cell
seed_train:
trainer:
epochs: 80 # number of epochs model will be trained before search
optimizer:
lr: 0.033
batch_chunks: 1
validation:
batch_chunks: 1
loader:
cutout: 6
train_batch: 32
test_batch: 32
img_size: 16
aug: ''
# dataset:
# max_batches: 32
train_batch: 128
post_train:
trainer:
epochs: 80 # number of epochs model will be trained after search
optimizer:
lr: 0.033
loader:
train_batch: 32
cutout: 6
test_batch: 32
img_size: 16
aug: ''
# dataset:
# max_batches: 32
train_batch: 96
trainer:
l1_alphas: 0.001 # as per paper
epochs: 80 # number of epochs model will be trained during search
optimizer:
lr: 0.033
loader:
train_batch: 32
val_ratio: 0.2 #split portion for train set, 0 to 1
cutout: 6
test_batch: 32
img_size: 16
aug: ''
# dataset:
# max_batches: 32
train_batch: 96
val_ratio: 0.2 #split portion for test set, 0 to 1