Clean up the test_snakes.py and run_snakes_training.py file

This commit is contained in:
soumya ranjan 2019-06-03 12:20:46 +05:30
Родитель 92c393b5ec
Коммит f3feb3072c
2 изменённых файлов: 92 добавлений и 81 удалений

Просмотреть файл

@ -3,6 +3,7 @@ import shutil
import subprocess
import torch
def main():
# Output directory for all the models after successful training
output_dir = 'result/snakes'
@ -13,98 +14,99 @@ def main():
'--print_freq', '100',
'--label_smoothing', '0.15',
'--use_onevsall_loss']
# Name tags for the different models that we will train
tags = []
# The run specific parameters, should correspond to the order in TAGS
params = []
### Preparing the training configurations
# Preparing the training configurations
# For each model training, we define a tag and the parameters
tags.append('resnext_224_init')
params.append(['--model_type', 'resnext101',
'--image_size', '224',
'--epochs', '4',
'--epoch_decay', '2',
'--lr_decay', '0.5',
'--lr', '0.01',
'--warm_up_iterations', '0',
'--train_logits_only',
'--batch_size', '128',
'--fp16'])
params.append(['--model_type', 'resnext101',
'--image_size', '224',
'--epochs', '4',
'--epoch_decay', '2',
'--lr_decay', '0.5',
'--lr', '0.01',
'--warm_up_iterations', '0',
'--train_logits_only',
'--batch_size', '128',
'--fp16'])
tags.append('resnext_224')
params.append(['--model_type', 'resnext101',
'--image_size', '224',
'--epochs', '50',
'--epoch_decay', '5',
'--lr_decay', '0.5',
'--lr', '0.01',
'--warm_up_iterations', '50',
'--batch_size', '128',
'--fp16',
'--resume', get_best_model_path(output_dir, 'resnext_224_init')])
params.append(['--model_type', 'resnext101',
'--image_size', '224',
'--epochs', '50',
'--epoch_decay', '5',
'--lr_decay', '0.5',
'--lr', '0.01',
'--warm_up_iterations', '50',
'--batch_size', '128',
'--fp16',
'--resume', get_best_model_path(output_dir, 'resnext_224_init')])
tags.append('resnext_448')
params.append(['--model_type', 'resnext101',
'--image_size', '448',
'--start_epoch', '0',
'--epochs', '30',
'--epoch_decay', '5',
'--lr_decay', '0.7',
'--lr', '0.005',
'--warm_up_iterations', '10',
'--batch_size', '32',
'--fp16',
'--resume', get_best_model_path(output_dir, 'resnext_224')])
'--image_size', '448',
'--start_epoch', '0',
'--epochs', '30',
'--epoch_decay', '5',
'--lr_decay', '0.7',
'--lr', '0.005',
'--warm_up_iterations', '10',
'--batch_size', '32',
'--fp16',
'--resume', get_best_model_path(output_dir, 'resnext_224')])
# Inceptionv4
# tags.append('inc4_299_init')
# params.append(['--model_type', 'inceptionv4',
# '--image_size', '299',
# '--epochs', '4',
# '--epoch_decay', '2',
# '--lr_decay', '0.94',
# '--lr', '0.05',
# '--warm_up_iterations', '0',
# '--train_logits_only',
# '--batch_size', '128',
# '--fp16'])
tags.append('inc4_299_init')
params.append(['--model_type', 'inceptionv4',
'--image_size', '299',
'--epochs', '4',
'--epoch_decay', '2',
'--lr_decay', '0.94',
'--lr', '0.05',
'--warm_up_iterations', '0',
'--train_logits_only',
'--batch_size', '128',
'--fp16'])
# tags.append('inc4_299')
# params.append(['--model_type', 'inceptionv4',
# '--image_size', '299',
# '--epochs', '25',
# '--epoch_decay', '4',
# '--lr_decay', '0.94',
# '--lr', '0.005',
# '--warm_up_iterations', '10',
# '--batch_size', '128',
# '--fp16',
# '--resume', get_best_model_path(output_dir, 'inc4_299_init')])
tags.append('inc4_299')
params.append(['--model_type', 'inceptionv4',
'--image_size', '299',
'--epochs', '25',
'--epoch_decay', '4',
'--lr_decay', '0.94',
'--lr', '0.005',
'--warm_up_iterations', '10',
'--batch_size', '128',
'--fp16',
'--resume', get_best_model_path(output_dir, 'inc4_299_init')])
# tags.append('inc4_488')
# params.append(['--model_type', 'inceptionv4',
# '--image_size', '488',
# '--epochs', '50',
# '--epoch_decay', '4',
# '--lr_decay', '0.94',
# '--lr', '0.005',
# '--warm_up_iterations', '20',
# '--batch_size', '32',
# '--fp16',
# '--resume', get_best_model_path(output_dir, 'inc4_299')])
tags.append('inc4_488')
params.append(['--model_type', 'inceptionv4',
'--image_size', '488',
'--epochs', '50',
'--epoch_decay', '4',
'--lr_decay', '0.94',
'--lr', '0.005',
'--warm_up_iterations', '20',
'--batch_size', '32',
'--fp16',
'--resume', get_best_model_path(output_dir, 'inc4_299')])
# Checking if everything is set up properly
assert len(tags) == len(params)
### The actual training
# The actual training
for tag, param in zip(tags, params):
print('Starting training of', tag)
result_dir = get_result_dir(output_dir, tag)
model_best = get_best_model_path(output_dir, tag)
if os.path.isfile(model_best):
print('Found existing trained model at {}, skipping the training of {}'.format(model_best, tag))
print('Found existing trained model at {}, skipping the training of {}'.format(
model_best, tag))
else:
# Check for checkpoint
checkpoint_file = 'checkpoint.pth.tar'
@ -112,14 +114,16 @@ def main():
resume_param = ['--resume', checkpoint_file]
else:
resume_param = []
subprocess.run(['python',
'-m', 'torch.distributed.launch',
'--nproc_per_node={}'.format(torch.cuda.device_count()),
'train.py']
subprocess.run(['python',
'-m', 'torch.distributed.launch',
'--nproc_per_node={}'.format(
torch.cuda.device_count()),
'train.py']
+ param + shared_params + resume_param, check=True)
assert os.path.isfile('model_best.pth.tar'), 'ERROR: The training did not produce model_best.pth.tar, ' + \
'You might need to adjust learning parameters.'
print('Seems training finished, moving trained models and log directory to', result_dir)
print(
'Seems training finished, moving trained models and log directory to', result_dir)
os.makedirs(result_dir, exist_ok=True)
shutil.move('model_best.pth.tar', result_dir)
shutil.move('checkpoint.pth.tar', result_dir)
@ -130,9 +134,11 @@ def get_result_dir(output_dir, tag):
''' Returns the directory, where we will store all models and logs after successful training '''
return os.path.join(output_dir, tag)
def get_best_model_path(output_dir, tag):
''' Returns the path, where we will store the best model after successful training '''
return os.path.join(get_result_dir(output_dir, tag), 'model_best.pth.tar')
if __name__ == '__main__':
main()

Просмотреть файл

@ -9,6 +9,10 @@ from data_loader import ImageLoader
from models import *
IMAGE_SIZES = 488
TEST_FOLDER = 'data/round1'
MODEL_PATH = 'result/snakes/inc4_488/model_best.pth.tar'
SAVE_TO = 'inc4_488_test_result.csv'
def get_model(model_path):
return ClassificationModel(model_path, image_sizes=IMAGE_SIZES, useGPU=True)
@ -39,30 +43,31 @@ def sort_columns(filename):
cols = cols[:1] + sorted(cols[1:])
df = df.reindex(columns=cols)
df.to_csv(filename, index=False)
def fill_corrupted_files(filename, folder):
df = pd.read_csv(filename)
test_dir = Path(folder)
test_imgs = [path.name for path in test_dir.iterdir()]
corrupted_imgs = list(set(test_imgs) - set(df.filename))
dummy_df = pd.DataFrame(np.ones((44, 45), dtype=np.float)/90, columns=df.columns[1:])
dummy_df = pd.DataFrame(
np.ones((44, 45), dtype=np.float)/90, columns=df.columns[1:])
dummy_df.insert(loc=0, column='filename', value=corrupted_imgs)
df = pd.concat([df, dummy_df])
df.to_csv(filename, index=False)
def main():
# create the test loader
test_folder = 'data/round1'
test_folder = TEST_FOLDER
test_data = TestDataset(test_folder, IMAGE_SIZES)
test_loader = data.DataLoader(test_data, batch_size=180, shuffle=False,
num_workers=4, pin_memory=True)
# load the model
model_path = 'result/snakes/inc4_488/model_best.pth.tar'
model_path =
model = get_model(model_path)
model.eval()
@ -91,14 +96,14 @@ def main():
columns=['filename'] +
list(map(lambda x: x[1], sorted(classnames.items(),
key=lambda x: x[0]))))
test_df.to_csv('test_result.csv', index=False)
test_df.to_csv(SAVE_TO, index=False)
if __name__ == '__main__':
print('Run the model on test set...\n\n')
main()
print('Sorting the columns...\n\n')
sort_columns('test_result.csv')
sort_columns(SAVE_TO)
print('Filling in the corrupted images...\n\n')
fill_corrupted_files('test_result.csv', 'data/round1')
fill_corrupted_files(SAVE_TO, 'data/round1')
print('Done!')