зеркало из https://github.com/microsoft/esvit.git
183 строки
8.9 KiB
Python
183 строки
8.9 KiB
Python
import argparse
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
|
|
import utils
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="PyTorch Efficient Self-supervised Training")
|
|
|
|
parser.add_argument('--cfg',
|
|
help='experiment configure file name',
|
|
type=str)
|
|
|
|
# Model parameters
|
|
parser.add_argument('--arch', default='deit_small', type=str,
|
|
choices=['swin_tiny','swin_small', 'swin_base', 'swin_large', 'swin', 'vil', 'vil_1281', 'vil_2262', 'vil_14121', 'deit_tiny', 'deit_small', 'vit_base'],
|
|
help="""Name of architecture to train. For quick experiments with ViTs,
|
|
we recommend using deit_tiny or deit_small.""")
|
|
parser.add_argument('--norm_last_layer', default=True, type=utils.bool_flag,
|
|
help="""Whether or not to weight normalize the last layer of the DINO head.
|
|
Not normalizing leads to better performance but can make the training unstable.
|
|
In our experiments, we typically set this paramater to False with deit_small and True with vit_base.""")
|
|
parser.add_argument('--use_dense_prediction', default=False, type=utils.bool_flag,
|
|
help="Whether to use dense prediction in projection head (Default: False)")
|
|
parser.add_argument('--teacher_temp', default=0.04, type=float, help="""Final value (after linear warmup)
|
|
of the teacher temperature. For most experiments, anything above 0.07 is unstable. We recommend
|
|
starting with the default value of 0.04 and increase this slightly if needed.""")
|
|
parser.add_argument('--warmup_teacher_temp_epochs', default=0, type=int,
|
|
help='Number of warmup epochs for the teacher temperature (Default: 30).')
|
|
parser.add_argument('--batch_size_per_gpu', default=64, type=int,
|
|
help='Per-GPU batch-size : number of distinct images loaded on one GPU.')
|
|
parser.add_argument('--epochs', default=100, type=int, help='Number of epochs of training.')
|
|
parser.add_argument('--aug-opt', type=str, default='dino_aug', metavar='NAME',
|
|
help='Use different data augmentation policy. [deit_aug, dino_aug, mocov2_aug, basic_aug] \
|
|
"(default: dino_aug)')
|
|
parser.add_argument('--zip_mode', type=utils.bool_flag, default=False, help="""Whether or not
|
|
to use zip file.""")
|
|
parser.add_argument('--data_path', default='/path/to/imagenet/train/', type=str,
|
|
help='Please specify path to the ImageNet training data.')
|
|
parser.add_argument('--output_dir', default=".", type=str, help='Path to save logs and checkpoints.')
|
|
parser.add_argument('--pretrained_weights_ckpt', default='.', type=str, help="Path to pretrained weights to evaluate.")
|
|
|
|
parser.add_argument("--warmup_epochs", default=10, type=int,
|
|
help="Number of epochs for the linear learning-rate warm up.")
|
|
# Dataset
|
|
parser.add_argument('--dataset', default="imagenet1k", type=str, help='Pre-training dataset.')
|
|
parser.add_argument('--tsv_mode', type=utils.bool_flag, default=False, help="""Whether or not to use tsv file.""")
|
|
parser.add_argument('--sampler', default="distributed", type=str, help='Sampler for dataloader.')
|
|
|
|
|
|
parser.add_argument('--use_mixup', type=utils.bool_flag, default=False, help="""Whether or not to use mixup/mixcut for self-supervised learning.""")
|
|
parser.add_argument('--num_mixup_views', type=int, default=10, help="""Number of views to apply mixup/mixcut """)
|
|
|
|
|
|
# distributed training
|
|
parser.add_argument("--num_nodes", default=1, type=int,
|
|
help="number of nodes for training")
|
|
parser.add_argument("--num_gpus_per_node", default=8, type=int,
|
|
help="passed as --nproc_per_node parameter")
|
|
parser.add_argument("--samples_per_gpu", default=1, type=int,
|
|
help="batch size for training")
|
|
parser.add_argument("--node_rank", default=-1, type=int,
|
|
help="node rank, should be in [0, num_nodes)")
|
|
|
|
# job meta info
|
|
parser.add_argument("--job_name", default="", type=str,
|
|
help="job name")
|
|
|
|
args = parser.parse_args()
|
|
print(args)
|
|
|
|
# config_file = args.config_file
|
|
# job_name = os.path.basename(args.config_file)[:-5] + "_" + args.job_name
|
|
|
|
if "OMPI_COMM_WORLD_SIZE" in os.environ:
|
|
if args.num_nodes != int(os.environ["OMPI_COMM_WORLD_SIZE"]):
|
|
args.num_nodes = int(os.environ["OMPI_COMM_WORLD_SIZE"])
|
|
else:
|
|
assert args.num_nodes > 0, "number of nodes should be larger than 0!!!"
|
|
print("number of nodes: ", args.num_nodes)
|
|
imgs_per_batch = args.samples_per_gpu * args.num_nodes * args.num_gpus_per_node
|
|
print("batch size: ", imgs_per_batch)
|
|
|
|
if args.num_nodes > 1:
|
|
args.node_rank = int(os.environ.get('OMPI_COMM_WORLD_RANK')) if 'OMPI_COMM_WORLD_RANK' in os.environ else args.node_rank
|
|
print("node rank: ", args.node_rank)
|
|
# get ip address and port for master process, which the other slave processes will use to communicate
|
|
master_addr = os.environ['MASTER_ADDR']
|
|
master_port = os.environ['MASTER_PORT']
|
|
print("master address-port: {}-{}".format(master_addr, master_port))
|
|
|
|
|
|
cmd = 'python -m torch.distributed.launch --nproc_per_node={0} --nnodes {1} --node_rank {2} --master_addr {3} --master_port {4} \
|
|
main_esvit.py --data_path {data_path} \
|
|
--output_dir {output_dir} \
|
|
--batch_size_per_gpu {batch_size_per_gpu} \
|
|
--arch {arch} \
|
|
--zip_mode {zip_mode} \
|
|
--epochs {epochs} \
|
|
--teacher_temp {teacher_temp} \
|
|
--warmup_teacher_temp_epochs {warmup_teacher_temp_epochs} \
|
|
--norm_last_layer {norm_last_layer} \
|
|
--cfg {cfg} \
|
|
--use_dense_prediction {use_dense_prediction} \
|
|
--use_mixup {use_mixup} \
|
|
--num_mixup_views {num_mixup_views} \
|
|
--dataset {dataset} \
|
|
--tsv_mode {tsv_mode} \
|
|
--sampler {sampler} \
|
|
--warmup_epochs {warmup_epochs} \
|
|
--pretrained_weights_ckpt {pretrained_weights_ckpt} \
|
|
--aug-opt {aug_opt}'\
|
|
.format(
|
|
args.num_gpus_per_node, args.num_nodes, args.node_rank, master_addr, master_port,
|
|
data_path=args.data_path,
|
|
output_dir=args.output_dir,
|
|
batch_size_per_gpu=args.batch_size_per_gpu,
|
|
arch=args.arch,
|
|
zip_mode=args.zip_mode,
|
|
epochs=args.epochs,
|
|
teacher_temp=args.teacher_temp,
|
|
warmup_teacher_temp_epochs=args.warmup_teacher_temp_epochs,
|
|
norm_last_layer=args.norm_last_layer,
|
|
cfg=args.cfg,
|
|
use_dense_prediction=args.use_dense_prediction,
|
|
use_mixup=args.use_mixup,
|
|
num_mixup_views=args.num_mixup_views,
|
|
dataset=args.dataset,
|
|
tsv_mode=args.tsv_mode,
|
|
sampler=args.sampler,
|
|
warmup_epochs=args.warmup_epochs,
|
|
pretrained_weights_ckpt=args.pretrained_weights_ckpt,
|
|
aug_opt=args.aug_opt
|
|
)
|
|
|
|
else:
|
|
cmd = 'python -m torch.distributed.launch --nproc_per_node={0} --nnodes {1} --node_rank {2} --master_addr {3} --master_port {4} \
|
|
main_esvit.py --data_path {data_path} \
|
|
--output_dir {output_dir} \
|
|
--batch_size_per_gpu {batch_size_per_gpu} \
|
|
--arch {arch} \
|
|
--zip_mode {zip_mode} \
|
|
--epochs {epochs} \
|
|
--teacher_temp {teacher_temp} \
|
|
--warmup_teacher_temp_epochs {warmup_teacher_temp_epochs} \
|
|
--norm_last_layer {norm_last_layer} \
|
|
--cfg {cfg} \
|
|
--use_dense_prediction {use_dense_prediction} \
|
|
--use_mixup {use_mixup} \
|
|
--num_mixup_views {num_mixup_views} \
|
|
--dataset {dataset} \
|
|
--tsv_mode {tsv_mode} \
|
|
--sampler {sampler} \
|
|
--warmup_epochs {warmup_epochs} \
|
|
--pretrained_weights_ckpt {pretrained_weights_ckpt} \
|
|
--aug-opt {aug_opt}'\
|
|
.format(
|
|
args.num_gpus_per_node, args.num_nodes, args.node_rank, master_addr, master_port,
|
|
data_path=args.data_path,
|
|
output_dir=args.output_dir,
|
|
batch_size_per_gpu=args.batch_size_per_gpu,
|
|
arch=args.arch,
|
|
zip_mode=args.zip_mode,
|
|
epochs=args.epochs,
|
|
teacher_temp=args.teacher_temp,
|
|
warmup_teacher_temp_epochs=args.warmup_teacher_temp_epochs,
|
|
norm_last_layer=args.norm_last_layer,
|
|
cfg=args.cfg,
|
|
use_dense_prediction=args.use_dense_prediction,
|
|
use_mixup=args.use_mixup,
|
|
num_mixup_views=args.num_mixup_views,
|
|
dataset=args.dataset,
|
|
tsv_mode=args.tsv_mode,
|
|
sampler=args.sampler,
|
|
warmup_epochs=args.warmup_epochs,
|
|
pretrained_weights_ckpt=args.pretrained_weights_ckpt,
|
|
aug_opt=args.aug_opt
|
|
)
|
|
|
|
subprocess.run(cmd, shell=True, check=True)
|