Clean up the test_snakes.py and run_snakes_training.py file
This commit is contained in:
Родитель
92c393b5ec
Коммит
f3feb3072c
|
@ -3,6 +3,7 @@ import shutil
|
|||
import subprocess
|
||||
import torch
|
||||
|
||||
|
||||
def main():
|
||||
# Output directory for all the models after successful training
|
||||
output_dir = 'result/snakes'
|
||||
|
@ -13,98 +14,99 @@ def main():
|
|||
'--print_freq', '100',
|
||||
'--label_smoothing', '0.15',
|
||||
'--use_onevsall_loss']
|
||||
|
||||
|
||||
# Name tags for the different models that we will train
|
||||
tags = []
|
||||
# The run specific parameters, should correspond to the order in TAGS
|
||||
params = []
|
||||
|
||||
### Preparing the training configurations
|
||||
# Preparing the training configurations
|
||||
# For each model training, we define a tag and the parameters
|
||||
tags.append('resnext_224_init')
|
||||
params.append(['--model_type', 'resnext101',
|
||||
'--image_size', '224',
|
||||
'--epochs', '4',
|
||||
'--epoch_decay', '2',
|
||||
'--lr_decay', '0.5',
|
||||
'--lr', '0.01',
|
||||
'--warm_up_iterations', '0',
|
||||
'--train_logits_only',
|
||||
'--batch_size', '128',
|
||||
'--fp16'])
|
||||
params.append(['--model_type', 'resnext101',
|
||||
'--image_size', '224',
|
||||
'--epochs', '4',
|
||||
'--epoch_decay', '2',
|
||||
'--lr_decay', '0.5',
|
||||
'--lr', '0.01',
|
||||
'--warm_up_iterations', '0',
|
||||
'--train_logits_only',
|
||||
'--batch_size', '128',
|
||||
'--fp16'])
|
||||
|
||||
tags.append('resnext_224')
|
||||
params.append(['--model_type', 'resnext101',
|
||||
'--image_size', '224',
|
||||
'--epochs', '50',
|
||||
'--epoch_decay', '5',
|
||||
'--lr_decay', '0.5',
|
||||
'--lr', '0.01',
|
||||
'--warm_up_iterations', '50',
|
||||
'--batch_size', '128',
|
||||
'--fp16',
|
||||
'--resume', get_best_model_path(output_dir, 'resnext_224_init')])
|
||||
params.append(['--model_type', 'resnext101',
|
||||
'--image_size', '224',
|
||||
'--epochs', '50',
|
||||
'--epoch_decay', '5',
|
||||
'--lr_decay', '0.5',
|
||||
'--lr', '0.01',
|
||||
'--warm_up_iterations', '50',
|
||||
'--batch_size', '128',
|
||||
'--fp16',
|
||||
'--resume', get_best_model_path(output_dir, 'resnext_224_init')])
|
||||
|
||||
tags.append('resnext_448')
|
||||
params.append(['--model_type', 'resnext101',
|
||||
'--image_size', '448',
|
||||
'--start_epoch', '0',
|
||||
'--epochs', '30',
|
||||
'--epoch_decay', '5',
|
||||
'--lr_decay', '0.7',
|
||||
'--lr', '0.005',
|
||||
'--warm_up_iterations', '10',
|
||||
'--batch_size', '32',
|
||||
'--fp16',
|
||||
'--resume', get_best_model_path(output_dir, 'resnext_224')])
|
||||
'--image_size', '448',
|
||||
'--start_epoch', '0',
|
||||
'--epochs', '30',
|
||||
'--epoch_decay', '5',
|
||||
'--lr_decay', '0.7',
|
||||
'--lr', '0.005',
|
||||
'--warm_up_iterations', '10',
|
||||
'--batch_size', '32',
|
||||
'--fp16',
|
||||
'--resume', get_best_model_path(output_dir, 'resnext_224')])
|
||||
|
||||
# Inceptionv4
|
||||
# tags.append('inc4_299_init')
|
||||
# params.append(['--model_type', 'inceptionv4',
|
||||
# '--image_size', '299',
|
||||
# '--epochs', '4',
|
||||
# '--epoch_decay', '2',
|
||||
# '--lr_decay', '0.94',
|
||||
# '--lr', '0.05',
|
||||
# '--warm_up_iterations', '0',
|
||||
# '--train_logits_only',
|
||||
# '--batch_size', '128',
|
||||
# '--fp16'])
|
||||
tags.append('inc4_299_init')
|
||||
params.append(['--model_type', 'inceptionv4',
|
||||
'--image_size', '299',
|
||||
'--epochs', '4',
|
||||
'--epoch_decay', '2',
|
||||
'--lr_decay', '0.94',
|
||||
'--lr', '0.05',
|
||||
'--warm_up_iterations', '0',
|
||||
'--train_logits_only',
|
||||
'--batch_size', '128',
|
||||
'--fp16'])
|
||||
|
||||
# tags.append('inc4_299')
|
||||
# params.append(['--model_type', 'inceptionv4',
|
||||
# '--image_size', '299',
|
||||
# '--epochs', '25',
|
||||
# '--epoch_decay', '4',
|
||||
# '--lr_decay', '0.94',
|
||||
# '--lr', '0.005',
|
||||
# '--warm_up_iterations', '10',
|
||||
# '--batch_size', '128',
|
||||
# '--fp16',
|
||||
# '--resume', get_best_model_path(output_dir, 'inc4_299_init')])
|
||||
tags.append('inc4_299')
|
||||
params.append(['--model_type', 'inceptionv4',
|
||||
'--image_size', '299',
|
||||
'--epochs', '25',
|
||||
'--epoch_decay', '4',
|
||||
'--lr_decay', '0.94',
|
||||
'--lr', '0.005',
|
||||
'--warm_up_iterations', '10',
|
||||
'--batch_size', '128',
|
||||
'--fp16',
|
||||
'--resume', get_best_model_path(output_dir, 'inc4_299_init')])
|
||||
|
||||
# tags.append('inc4_488')
|
||||
# params.append(['--model_type', 'inceptionv4',
|
||||
# '--image_size', '488',
|
||||
# '--epochs', '50',
|
||||
# '--epoch_decay', '4',
|
||||
# '--lr_decay', '0.94',
|
||||
# '--lr', '0.005',
|
||||
# '--warm_up_iterations', '20',
|
||||
# '--batch_size', '32',
|
||||
# '--fp16',
|
||||
# '--resume', get_best_model_path(output_dir, 'inc4_299')])
|
||||
tags.append('inc4_488')
|
||||
params.append(['--model_type', 'inceptionv4',
|
||||
'--image_size', '488',
|
||||
'--epochs', '50',
|
||||
'--epoch_decay', '4',
|
||||
'--lr_decay', '0.94',
|
||||
'--lr', '0.005',
|
||||
'--warm_up_iterations', '20',
|
||||
'--batch_size', '32',
|
||||
'--fp16',
|
||||
'--resume', get_best_model_path(output_dir, 'inc4_299')])
|
||||
|
||||
# Checking if everything is set up properly
|
||||
assert len(tags) == len(params)
|
||||
|
||||
### The actual training
|
||||
# The actual training
|
||||
for tag, param in zip(tags, params):
|
||||
print('Starting training of', tag)
|
||||
result_dir = get_result_dir(output_dir, tag)
|
||||
model_best = get_best_model_path(output_dir, tag)
|
||||
if os.path.isfile(model_best):
|
||||
print('Found existing trained model at {}, skipping the training of {}'.format(model_best, tag))
|
||||
print('Found existing trained model at {}, skipping the training of {}'.format(
|
||||
model_best, tag))
|
||||
else:
|
||||
# Check for checkpoint
|
||||
checkpoint_file = 'checkpoint.pth.tar'
|
||||
|
@ -112,14 +114,16 @@ def main():
|
|||
resume_param = ['--resume', checkpoint_file]
|
||||
else:
|
||||
resume_param = []
|
||||
subprocess.run(['python',
|
||||
'-m', 'torch.distributed.launch',
|
||||
'--nproc_per_node={}'.format(torch.cuda.device_count()),
|
||||
'train.py']
|
||||
subprocess.run(['python',
|
||||
'-m', 'torch.distributed.launch',
|
||||
'--nproc_per_node={}'.format(
|
||||
torch.cuda.device_count()),
|
||||
'train.py']
|
||||
+ param + shared_params + resume_param, check=True)
|
||||
assert os.path.isfile('model_best.pth.tar'), 'ERROR: The training did not produce model_best.pth.tar, ' + \
|
||||
'You might need to adjust learning parameters.'
|
||||
print('Seems training finished, moving trained models and log directory to', result_dir)
|
||||
print(
|
||||
'Seems training finished, moving trained models and log directory to', result_dir)
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
shutil.move('model_best.pth.tar', result_dir)
|
||||
shutil.move('checkpoint.pth.tar', result_dir)
|
||||
|
@ -130,9 +134,11 @@ def get_result_dir(output_dir, tag):
|
|||
''' Returns the directory, where we will store all models and logs after successful training '''
|
||||
return os.path.join(output_dir, tag)
|
||||
|
||||
|
||||
def get_best_model_path(output_dir, tag):
|
||||
''' Returns the path, where we will store the best model after successful training '''
|
||||
return os.path.join(get_result_dir(output_dir, tag), 'model_best.pth.tar')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -9,6 +9,10 @@ from data_loader import ImageLoader
|
|||
from models import *
|
||||
|
||||
IMAGE_SIZES = 488
|
||||
TEST_FOLDER = 'data/round1'
|
||||
MODEL_PATH = 'result/snakes/inc4_488/model_best.pth.tar'
|
||||
SAVE_TO = 'inc4_488_test_result.csv'
|
||||
|
||||
|
||||
def get_model(model_path):
|
||||
return ClassificationModel(model_path, image_sizes=IMAGE_SIZES, useGPU=True)
|
||||
|
@ -39,30 +43,31 @@ def sort_columns(filename):
|
|||
cols = cols[:1] + sorted(cols[1:])
|
||||
df = df.reindex(columns=cols)
|
||||
df.to_csv(filename, index=False)
|
||||
|
||||
|
||||
|
||||
def fill_corrupted_files(filename, folder):
|
||||
df = pd.read_csv(filename)
|
||||
test_dir = Path(folder)
|
||||
test_imgs = [path.name for path in test_dir.iterdir()]
|
||||
corrupted_imgs = list(set(test_imgs) - set(df.filename))
|
||||
|
||||
dummy_df = pd.DataFrame(np.ones((44, 45), dtype=np.float)/90, columns=df.columns[1:])
|
||||
|
||||
dummy_df = pd.DataFrame(
|
||||
np.ones((44, 45), dtype=np.float)/90, columns=df.columns[1:])
|
||||
dummy_df.insert(loc=0, column='filename', value=corrupted_imgs)
|
||||
|
||||
|
||||
df = pd.concat([df, dummy_df])
|
||||
df.to_csv(filename, index=False)
|
||||
|
||||
|
||||
def main():
|
||||
# create the test loader
|
||||
test_folder = 'data/round1'
|
||||
test_folder = TEST_FOLDER
|
||||
test_data = TestDataset(test_folder, IMAGE_SIZES)
|
||||
test_loader = data.DataLoader(test_data, batch_size=180, shuffle=False,
|
||||
num_workers=4, pin_memory=True)
|
||||
|
||||
# load the model
|
||||
model_path = 'result/snakes/inc4_488/model_best.pth.tar'
|
||||
model_path =
|
||||
model = get_model(model_path)
|
||||
model.eval()
|
||||
|
||||
|
@ -91,14 +96,14 @@ def main():
|
|||
columns=['filename'] +
|
||||
list(map(lambda x: x[1], sorted(classnames.items(),
|
||||
key=lambda x: x[0]))))
|
||||
test_df.to_csv('test_result.csv', index=False)
|
||||
test_df.to_csv(SAVE_TO, index=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('Run the model on test set...\n\n')
|
||||
main()
|
||||
print('Sorting the columns...\n\n')
|
||||
sort_columns('test_result.csv')
|
||||
sort_columns(SAVE_TO)
|
||||
print('Filling in the corrupted images...\n\n')
|
||||
fill_corrupted_files('test_result.csv', 'data/round1')
|
||||
fill_corrupted_files(SAVE_TO, 'data/round1')
|
||||
print('Done!')
|
||||
|
|
Загрузка…
Ссылка в новой задаче