This commit is contained in:
hobogalaxy 2021-03-05 23:09:21 +01:00
Родитель 497515e40d
Коммит 3249fb45ab
6 изменённых файлов: 121 добавлений и 77 удалений

Просмотреть файл

@ -35,3 +35,13 @@ hydra:
sweep:
dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
subdir: ${hydra.job.num}
job:
env_set:
# currently there are some issues with running sweeps alongside wandb
# https://github.com/wandb/client/issues/1314
# this env variable fixes that
WANDB_START_METHOD: thread
# ignore python warnings
# PYTHONWARNINGS: ignore

Просмотреть файл

@ -3,7 +3,7 @@ gpus: 0 # set -1 to train on all GPUs available, set 0 to train on CPU only
min_epochs: 1
max_epochs: 10
gradient_clip_val: 0.5
num_sanity_val_steps: 3
num_sanity_val_steps: 2
progress_bar_refresh_rate: 20
weights_summary: null
default_root_dir: "lightning_logs/"

Просмотреть файл

@ -1,21 +0,0 @@
# TESTS FOR DIFFERENT LOGGERS
# TO EXECUTE:
# bash tests/logger_tests.sh
# conda activate testenv
# Test CSV logger
echo TEST 1
python train.py logger=csv_logger trainer.min_epochs=3 trainer.max_epochs=3 trainer.gpus=1
# # Test Weights&Biases logger
echo TEST 2
python train.py logger=wandb logger.wandb.project="env_tests" trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1
# Test TensorBoard logger
echo TEST 3
python train.py logger=tensorboard trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1
# Test many loggers at once
echo TEST 4
python train.py logger=many_loggers trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1

Просмотреть файл

@ -1,44 +0,0 @@
# THESE ARE JUST A COUPLE OF QUICK EXPERIMENTS TO TEST IF YOUR MODEL DOESN'T CRASH UNDER DIFFERENT CONDITIONS
# TO EXECUTE:
# bash tests/quick_tests.sh
# conda activate testenv
export PYTHONWARNINGS="ignore"
print_test_name() {
termwidth="$(tput cols)"
padding="$(printf '%0.1s' ={1..500})"
printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
}
# Test for CPU
print_test_name "TEST 1"
python train.py trainer.gpus=0 trainer.max_epochs=1 print_config=false
# Test for GPU
print_test_name "TEST 2"
python train.py trainer.gpus=1 trainer.max_epochs=1 print_config=false
# Test multiple workers and cuda pinned memory
print_test_name "TEST 3"
python train.py trainer.gpus=1 trainer.max_epochs=2 print_config=false\
datamodule.num_workers=4 datamodule.pin_memory=True
# Test all experiment configs
print_test_name "TEST 4"
python train.py -m '+experiment=glob(*)' trainer.gpus=1 trainer.max_epochs=3 print_config=false
# Test with debug trainer
print_test_name "TEST 5"
python train.py trainer=debug_trainer print_config=false
# Overfit to 10 bathes
print_test_name "TEST 6"
python train.py trainer.min_epochs=20 trainer.max_epochs=20 +trainer.overfit_batches=10 print_config=false
# Test default hydra sweep over hyperparameters (runs 4 different combinations for 1 epoch)
print_test_name "TEST 7"
python train.py -m datamodule.batch_size=32,64 model.lr=0.001,0.003 print_config=false \
trainer.gpus=1 trainer.max_epochs=1

97
tests/smoke_tests.sh Normal file
Просмотреть файл

@ -0,0 +1,97 @@
# !/bin/bash
# These are just a couple of quick experiments to test if your model doesn't crash under different conditions
# To execute:
# bash tests/quick_tests.sh
# Method for printing test name
echo() {
termwidth="$(tput cols)"
padding="$(printf '%0.1s' ={1..500})"
printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
}
# Make python hide warnings
export PYTHONWARNINGS="ignore"
# Test fast_dev_run (runs for 1 train, 1 val and 1 test batch)
echo "TEST 1"
python train.py +trainer.fast_dev_run=True \
print_config=false
# Overfit to 10 bathes
echo "TEST 2"
python train.py +trainer.overfit_batches=10 \
trainer.min_epochs=20 trainer.max_epochs=20 \
print_config=false
# Test 1 epoch on CPU
echo "TEST 3"
python train.py trainer.gpus=0 trainer.max_epochs=1 \
print_config=false
# Test 1 epoch on GPU
echo "TEST 4"
python train.py trainer.gpus=1 trainer.max_epochs=1 \
print_config=false
# Test on 25% of data
echo "TEST 5"
python train.py trainer.max_epochs=1 \
+trainer.limit_train_batches=0.25 +trainer.limit_val_batches=0.25 +trainer.limit_test_batches=0.25 \
print_config=false
# Test on 15 train batches, 10 val batches, 5 test batches
echo "TEST 6"
python train.py trainer.max_epochs=1 \
+trainer.limit_train_batches=15 +trainer.limit_val_batches=10 +trainer.limit_test_batches=5 \
print_config=false
# Test all experiment configs
echo "TEST 7"
python train.py -m '+experiment=glob(*)' trainer.gpus=1 trainer.max_epochs=2 \
print_config=false
# Test default hydra sweep over hyperparameters (runs 4 different combinations with fast_dev_run)
echo "TEST 8"
python train.py -m datamodule.batch_size=32,64 model.lr=0.001,0.003 \
+trainer.fast_dev_run=True \
print_config=false
# Test multiple workers and cuda pinned memory
echo "TEST 9"
python train.py trainer.gpus=1 trainer.max_epochs=2 \
datamodule.num_workers=4 datamodule.pin_memory=True \
print_config=false
# Test 16 bit precision
echo "TEST 10"
python train.py trainer.gpus=1 trainer.max_epochs=1 precision=16 \
print_config=false
# Test gradient accumulation
echo "TEST 11"
python train.py trainer.gpus=1 trainer.max_epochs=1 accumulate_grad_batches=10 \
print_config=false
# Test running validation loop twice per epoch
echo "TEST 12"
python train.py trainer.gpus=1 trainer.max_epochs=2 val_check_interval=0.5 \
print_config=false
# Test CSV logger (5 epochs)
echo "TEST 13"
python train.py logger=csv_logger trainer.min_epochs=5 trainer.max_epochs=5 trainer.gpus=1 \
print_config=false
# Test TensorBoard logger (5 epochs)
echo "TEST 14"
python train.py logger=tensorboard trainer.min_epochs=5 trainer.max_epochs=5 trainer.gpus=1 \
print_config=false
# Test mixed-precision training
echo "TEST 15"
python train.py trainer.gpus=1 trainer.max_epochs=3 \
+amp_backend='apex' amp_level='O2' \
print_config=false

Просмотреть файл

@ -1,18 +1,20 @@
# TESTS FOR HYPERPARAMETER SWEEPS
# TO EXECUTE:
# !/bin/bash
# Test hyperparameter sweeps
# To execute:
# bash tests/sweep_tests.sh
# conda activate testenv
# currently there are some issues with running sweeps alongside wandb
# https://github.com/wandb/client/issues/1314
# this env variable fixes that
export WANDB_START_METHOD=thread
echo() {
termwidth="$(tput cols)"
padding="$(printf '%0.1s' ={1..500})"
printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
}
# Make python hide warnings
export PYTHONWARNINGS="ignore"
# Test default hydra sweep with wandb logging
echo TEST 1
echo "TEST 1"
python train.py -m datamodule.batch_size=64,128 model.lr=0.001,0.003 \
+experiment=exp_example_simple \
trainer.gpus=1 trainer.max_epochs=2 seed=12345 \
@ -20,7 +22,7 @@ datamodule.num_workers=12 datamodule.pin_memory=True \
logger=wandb logger.wandb.project="env_tests" logger.wandb.group="DefaultSweep_MNIST_SimpleDenseNet"
# Test optuna sweep with wandb logging
echo TEST 2
echo "TEST 2"
python train.py -m --config-name config_optuna.yaml \
+experiment=exp_example_simple \
trainer.gpus=1 trainer.max_epochs=5 seed=12345 \