improve smoke tests
This commit is contained in:
Родитель
497515e40d
Коммит
3249fb45ab
|
@ -35,3 +35,13 @@ hydra:
|
|||
sweep:
|
||||
dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
|
||||
subdir: ${hydra.job.num}
|
||||
|
||||
job:
|
||||
env_set:
|
||||
# currently there are some issues with running sweeps alongside wandb
|
||||
# https://github.com/wandb/client/issues/1314
|
||||
# this env variable fixes that
|
||||
WANDB_START_METHOD: thread
|
||||
|
||||
# ignore python warnings
|
||||
# PYTHONWARNINGS: ignore
|
||||
|
|
|
@ -3,7 +3,7 @@ gpus: 0 # set -1 to train on all GPUs available, set 0 to train on CPU only
|
|||
min_epochs: 1
|
||||
max_epochs: 10
|
||||
gradient_clip_val: 0.5
|
||||
num_sanity_val_steps: 3
|
||||
num_sanity_val_steps: 2
|
||||
progress_bar_refresh_rate: 20
|
||||
weights_summary: null
|
||||
default_root_dir: "lightning_logs/"
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
# TESTS FOR DIFFERENT LOGGERS
|
||||
# TO EXECUTE:
|
||||
# bash tests/logger_tests.sh
|
||||
|
||||
# conda activate testenv
|
||||
|
||||
# Test CSV logger
|
||||
echo TEST 1
|
||||
python train.py logger=csv_logger trainer.min_epochs=3 trainer.max_epochs=3 trainer.gpus=1
|
||||
|
||||
# # Test Weights&Biases logger
|
||||
echo TEST 2
|
||||
python train.py logger=wandb logger.wandb.project="env_tests" trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1
|
||||
|
||||
# Test TensorBoard logger
|
||||
echo TEST 3
|
||||
python train.py logger=tensorboard trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1
|
||||
|
||||
# Test many loggers at once
|
||||
echo TEST 4
|
||||
python train.py logger=many_loggers trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1
|
|
@ -1,44 +0,0 @@
|
|||
# THESE ARE JUST A COUPLE OF QUICK EXPERIMENTS TO TEST IF YOUR MODEL DOESN'T CRASH UNDER DIFFERENT CONDITIONS
|
||||
# TO EXECUTE:
|
||||
# bash tests/quick_tests.sh
|
||||
|
||||
# conda activate testenv
|
||||
|
||||
export PYTHONWARNINGS="ignore"
|
||||
|
||||
print_test_name() {
|
||||
termwidth="$(tput cols)"
|
||||
padding="$(printf '%0.1s' ={1..500})"
|
||||
printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
|
||||
}
|
||||
|
||||
|
||||
# Test for CPU
|
||||
print_test_name "TEST 1"
|
||||
python train.py trainer.gpus=0 trainer.max_epochs=1 print_config=false
|
||||
|
||||
# Test for GPU
|
||||
print_test_name "TEST 2"
|
||||
python train.py trainer.gpus=1 trainer.max_epochs=1 print_config=false
|
||||
|
||||
# Test multiple workers and cuda pinned memory
|
||||
print_test_name "TEST 3"
|
||||
python train.py trainer.gpus=1 trainer.max_epochs=2 print_config=false\
|
||||
datamodule.num_workers=4 datamodule.pin_memory=True
|
||||
|
||||
# Test all experiment configs
|
||||
print_test_name "TEST 4"
|
||||
python train.py -m '+experiment=glob(*)' trainer.gpus=1 trainer.max_epochs=3 print_config=false
|
||||
|
||||
# Test with debug trainer
|
||||
print_test_name "TEST 5"
|
||||
python train.py trainer=debug_trainer print_config=false
|
||||
|
||||
# Overfit to 10 bathes
|
||||
print_test_name "TEST 6"
|
||||
python train.py trainer.min_epochs=20 trainer.max_epochs=20 +trainer.overfit_batches=10 print_config=false
|
||||
|
||||
# Test default hydra sweep over hyperparameters (runs 4 different combinations for 1 epoch)
|
||||
print_test_name "TEST 7"
|
||||
python train.py -m datamodule.batch_size=32,64 model.lr=0.001,0.003 print_config=false \
|
||||
trainer.gpus=1 trainer.max_epochs=1
|
|
@ -0,0 +1,97 @@
|
|||
# !/bin/bash
|
||||
# These are just a couple of quick experiments to test if your model doesn't crash under different conditions
|
||||
|
||||
# To execute:
|
||||
# bash tests/quick_tests.sh
|
||||
|
||||
# Method for printing test name
|
||||
echo() {
|
||||
termwidth="$(tput cols)"
|
||||
padding="$(printf '%0.1s' ={1..500})"
|
||||
printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
|
||||
}
|
||||
|
||||
# Make python hide warnings
|
||||
export PYTHONWARNINGS="ignore"
|
||||
|
||||
|
||||
# Test fast_dev_run (runs for 1 train, 1 val and 1 test batch)
|
||||
echo "TEST 1"
|
||||
python train.py +trainer.fast_dev_run=True \
|
||||
print_config=false
|
||||
|
||||
# Overfit to 10 bathes
|
||||
echo "TEST 2"
|
||||
python train.py +trainer.overfit_batches=10 \
|
||||
trainer.min_epochs=20 trainer.max_epochs=20 \
|
||||
print_config=false
|
||||
|
||||
# Test 1 epoch on CPU
|
||||
echo "TEST 3"
|
||||
python train.py trainer.gpus=0 trainer.max_epochs=1 \
|
||||
print_config=false
|
||||
|
||||
# Test 1 epoch on GPU
|
||||
echo "TEST 4"
|
||||
python train.py trainer.gpus=1 trainer.max_epochs=1 \
|
||||
print_config=false
|
||||
|
||||
# Test on 25% of data
|
||||
echo "TEST 5"
|
||||
python train.py trainer.max_epochs=1 \
|
||||
+trainer.limit_train_batches=0.25 +trainer.limit_val_batches=0.25 +trainer.limit_test_batches=0.25 \
|
||||
print_config=false
|
||||
|
||||
# Test on 15 train batches, 10 val batches, 5 test batches
|
||||
echo "TEST 6"
|
||||
python train.py trainer.max_epochs=1 \
|
||||
+trainer.limit_train_batches=15 +trainer.limit_val_batches=10 +trainer.limit_test_batches=5 \
|
||||
print_config=false
|
||||
|
||||
# Test all experiment configs
|
||||
echo "TEST 7"
|
||||
python train.py -m '+experiment=glob(*)' trainer.gpus=1 trainer.max_epochs=2 \
|
||||
print_config=false
|
||||
|
||||
# Test default hydra sweep over hyperparameters (runs 4 different combinations with fast_dev_run)
|
||||
echo "TEST 8"
|
||||
python train.py -m datamodule.batch_size=32,64 model.lr=0.001,0.003 \
|
||||
+trainer.fast_dev_run=True \
|
||||
print_config=false
|
||||
|
||||
# Test multiple workers and cuda pinned memory
|
||||
echo "TEST 9"
|
||||
python train.py trainer.gpus=1 trainer.max_epochs=2 \
|
||||
datamodule.num_workers=4 datamodule.pin_memory=True \
|
||||
print_config=false
|
||||
|
||||
# Test 16 bit precision
|
||||
echo "TEST 10"
|
||||
python train.py trainer.gpus=1 trainer.max_epochs=1 precision=16 \
|
||||
print_config=false
|
||||
|
||||
# Test gradient accumulation
|
||||
echo "TEST 11"
|
||||
python train.py trainer.gpus=1 trainer.max_epochs=1 accumulate_grad_batches=10 \
|
||||
print_config=false
|
||||
|
||||
# Test running validation loop twice per epoch
|
||||
echo "TEST 12"
|
||||
python train.py trainer.gpus=1 trainer.max_epochs=2 val_check_interval=0.5 \
|
||||
print_config=false
|
||||
|
||||
# Test CSV logger (5 epochs)
|
||||
echo "TEST 13"
|
||||
python train.py logger=csv_logger trainer.min_epochs=5 trainer.max_epochs=5 trainer.gpus=1 \
|
||||
print_config=false
|
||||
|
||||
# Test TensorBoard logger (5 epochs)
|
||||
echo "TEST 14"
|
||||
python train.py logger=tensorboard trainer.min_epochs=5 trainer.max_epochs=5 trainer.gpus=1 \
|
||||
print_config=false
|
||||
|
||||
# Test mixed-precision training
|
||||
echo "TEST 15"
|
||||
python train.py trainer.gpus=1 trainer.max_epochs=3 \
|
||||
+amp_backend='apex' amp_level='O2' \
|
||||
print_config=false
|
|
@ -1,18 +1,20 @@
|
|||
# TESTS FOR HYPERPARAMETER SWEEPS
|
||||
# TO EXECUTE:
|
||||
# !/bin/bash
|
||||
# Test hyperparameter sweeps
|
||||
|
||||
# To execute:
|
||||
# bash tests/sweep_tests.sh
|
||||
|
||||
# conda activate testenv
|
||||
|
||||
|
||||
# currently there are some issues with running sweeps alongside wandb
|
||||
# https://github.com/wandb/client/issues/1314
|
||||
# this env variable fixes that
|
||||
export WANDB_START_METHOD=thread
|
||||
echo() {
|
||||
termwidth="$(tput cols)"
|
||||
padding="$(printf '%0.1s' ={1..500})"
|
||||
printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
|
||||
}
|
||||
|
||||
# Make python hide warnings
|
||||
export PYTHONWARNINGS="ignore"
|
||||
|
||||
# Test default hydra sweep with wandb logging
|
||||
echo TEST 1
|
||||
echo "TEST 1"
|
||||
python train.py -m datamodule.batch_size=64,128 model.lr=0.001,0.003 \
|
||||
+experiment=exp_example_simple \
|
||||
trainer.gpus=1 trainer.max_epochs=2 seed=12345 \
|
||||
|
@ -20,7 +22,7 @@ datamodule.num_workers=12 datamodule.pin_memory=True \
|
|||
logger=wandb logger.wandb.project="env_tests" logger.wandb.group="DefaultSweep_MNIST_SimpleDenseNet"
|
||||
|
||||
# Test optuna sweep with wandb logging
|
||||
echo TEST 2
|
||||
echo "TEST 2"
|
||||
python train.py -m --config-name config_optuna.yaml \
|
||||
+experiment=exp_example_simple \
|
||||
trainer.gpus=1 trainer.max_epochs=5 seed=12345 \
|
||||
|
|
Загрузка…
Ссылка в новой задаче