improve smoke tests

2021-03-05 23:09:21 +01:00 · 2021-03-05 23:09:21 +01:00 · 3249fb45ab
--- a/configs/config.yaml
+++ b/configs/config.yaml
@ -35,3 +35,13 @@ hydra:
    sweep:
        dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
        subdir: ${hydra.job.num}
+
+    job:
+        env_set:
+            # currently there are some issues with running sweeps alongside wandb 
+            # https://github.com/wandb/client/issues/1314
+            # this env variable fixes that
+            WANDB_START_METHOD: thread
+
+            # ignore python warnings
+            # PYTHONWARNINGS: ignore
--- a/configs/trainer/default_trainer.yaml
+++ b/configs/trainer/default_trainer.yaml
@ -3,7 +3,7 @@ gpus: 0  # set -1 to train on all GPUs available, set 0 to train on CPU only
 min_epochs: 1
 max_epochs: 10
 gradient_clip_val: 0.5
-num_sanity_val_steps: 3
+num_sanity_val_steps: 2
 progress_bar_refresh_rate: 20
 weights_summary: null
 default_root_dir: "lightning_logs/"
--- a/tests/logger_tests.sh
+++ b/tests/logger_tests.sh
@ -1,21 +0,0 @@
-# TESTS FOR DIFFERENT LOGGERS
-# TO EXECUTE:
-# bash tests/logger_tests.sh
-
-# conda activate testenv
-
-# Test CSV logger
-echo TEST 1
-python train.py logger=csv_logger trainer.min_epochs=3 trainer.max_epochs=3 trainer.gpus=1
-
-# # Test Weights&Biases logger
-echo TEST 2
-python train.py logger=wandb logger.wandb.project="env_tests" trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1
-
-# Test TensorBoard logger
-echo TEST 3
-python train.py logger=tensorboard trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1
-
-# Test many loggers at once
-echo TEST 4
-python train.py logger=many_loggers trainer.min_epochs=10 trainer.max_epochs=10 trainer.gpus=1
--- a/tests/quick_tests.sh
+++ b/tests/quick_tests.sh
@ -1,44 +0,0 @@
-# THESE ARE JUST A COUPLE OF QUICK EXPERIMENTS TO TEST IF YOUR MODEL DOESN'T CRASH UNDER DIFFERENT CONDITIONS
-# TO EXECUTE:
-# bash tests/quick_tests.sh
-
-# conda activate testenv
-
-export PYTHONWARNINGS="ignore"
-
-print_test_name() {
-  termwidth="$(tput cols)"
-  padding="$(printf '%0.1s' ={1..500})"
-  printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
-}
-
-
-# Test for CPU
-print_test_name "TEST 1"
-python train.py trainer.gpus=0 trainer.max_epochs=1 print_config=false
-
-# Test for GPU
-print_test_name "TEST 2"
-python train.py trainer.gpus=1 trainer.max_epochs=1 print_config=false
-
-# Test multiple workers and cuda pinned memory
-print_test_name "TEST 3"
-python train.py trainer.gpus=1 trainer.max_epochs=2 print_config=false\
-datamodule.num_workers=4 datamodule.pin_memory=True  
-
-# Test all experiment configs
-print_test_name "TEST 4"
-python train.py -m '+experiment=glob(*)' trainer.gpus=1 trainer.max_epochs=3 print_config=false
-
-# Test with debug trainer
-print_test_name "TEST 5"
-python train.py trainer=debug_trainer print_config=false
-
-# Overfit to 10 bathes
-print_test_name "TEST 6"
-python train.py trainer.min_epochs=20 trainer.max_epochs=20 +trainer.overfit_batches=10 print_config=false
-
-# Test default hydra sweep over hyperparameters (runs 4 different combinations for 1 epoch)
-print_test_name "TEST 7"
-python train.py -m datamodule.batch_size=32,64 model.lr=0.001,0.003 print_config=false \
-trainer.gpus=1 trainer.max_epochs=1
--- a/tests/smoke_tests.sh
+++ b/tests/smoke_tests.sh
@ -0,0 +1,97 @@
+# !/bin/bash
+# These are just a couple of quick experiments to test if your model doesn't crash under different conditions
+
+# To execute:
+# bash tests/quick_tests.sh
+
+# Method for printing test name
+echo() {
+  termwidth="$(tput cols)"
+  padding="$(printf '%0.1s' ={1..500})"
+  printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
+}
+
+# Make python hide warnings
+export PYTHONWARNINGS="ignore"
+
+
+# Test fast_dev_run (runs for 1 train, 1 val and 1 test batch)
+echo "TEST 1"
+python train.py +trainer.fast_dev_run=True \
+print_config=false
+
+# Overfit to 10 bathes
+echo "TEST 2"
+python train.py +trainer.overfit_batches=10 \
+trainer.min_epochs=20 trainer.max_epochs=20 \
+print_config=false
+
+# Test 1 epoch on CPU
+echo "TEST 3"
+python train.py trainer.gpus=0 trainer.max_epochs=1 \
+print_config=false
+
+# Test 1 epoch on GPU
+echo "TEST 4"
+python train.py trainer.gpus=1 trainer.max_epochs=1 \
+print_config=false
+
+# Test on 25% of data
+echo "TEST 5"
+python train.py trainer.max_epochs=1 \
+trainer.limit_train_batches=0.25 +trainer.limit_val_batches=0.25 +trainer.limit_test_batches=0.25 \
+print_config=false
+
+# Test on 15 train batches, 10 val batches, 5 test batches
+echo "TEST 6"
+python train.py trainer.max_epochs=1 \
+trainer.limit_train_batches=15 +trainer.limit_val_batches=10 +trainer.limit_test_batches=5 \
+print_config=false
+
+# Test all experiment configs
+echo "TEST 7"
+python train.py -m '+experiment=glob(*)' trainer.gpus=1 trainer.max_epochs=2 \
+print_config=false
+
+# Test default hydra sweep over hyperparameters (runs 4 different combinations with fast_dev_run)
+echo "TEST 8"
+python train.py -m datamodule.batch_size=32,64 model.lr=0.001,0.003 \
+trainer.fast_dev_run=True \
+print_config=false
+
+# Test multiple workers and cuda pinned memory
+echo "TEST 9"
+python train.py trainer.gpus=1 trainer.max_epochs=2 \
+datamodule.num_workers=4 datamodule.pin_memory=True \
+print_config=false 
+
+# Test 16 bit precision
+echo "TEST 10"
+python train.py trainer.gpus=1 trainer.max_epochs=1 precision=16 \
+print_config=false
+
+# Test gradient accumulation
+echo "TEST 11"
+python train.py trainer.gpus=1 trainer.max_epochs=1 accumulate_grad_batches=10 \
+print_config=false
+
+# Test running validation loop twice per epoch
+echo "TEST 12"
+python train.py trainer.gpus=1 trainer.max_epochs=2 val_check_interval=0.5 \
+print_config=false
+
+# Test CSV logger (5 epochs)
+echo "TEST 13"
+python train.py logger=csv_logger trainer.min_epochs=5 trainer.max_epochs=5 trainer.gpus=1 \
+print_config=false
+
+# Test TensorBoard logger (5 epochs)
+echo "TEST 14"
+python train.py logger=tensorboard trainer.min_epochs=5 trainer.max_epochs=5 trainer.gpus=1 \
+print_config=false
+
+# Test mixed-precision training
+echo "TEST 15"
+python train.py trainer.gpus=1 trainer.max_epochs=3 \
+amp_backend='apex' amp_level='O2' \
+print_config=false
--- a/tests/sweep_tests.sh
+++ b/tests/sweep_tests.sh
@ -1,18 +1,20 @@
-# TESTS FOR HYPERPARAMETER SWEEPS
-# TO EXECUTE:
+# !/bin/bash
+# Test hyperparameter sweeps
+
+# To execute:
 # bash tests/sweep_tests.sh

-# conda activate testenv
-
-
-# currently there are some issues with running sweeps alongside wandb 
-# https://github.com/wandb/client/issues/1314
-# this env variable fixes that
-export WANDB_START_METHOD=thread
+echo() {
+  termwidth="$(tput cols)"
+  padding="$(printf '%0.1s' ={1..500})"
+  printf '\e[33m%*.*s %s %*.*s\n\e[0m' 0 "$(((termwidth-2-${#1})/2))" "$padding" "$1" 0 "$(((termwidth-1-${#1})/2))" "$padding"
+}

+# Make python hide warnings
+export PYTHONWARNINGS="ignore"

 # Test default hydra sweep with wandb logging
-echo TEST 1
+echo "TEST 1"
 python train.py -m datamodule.batch_size=64,128 model.lr=0.001,0.003 \
 +experiment=exp_example_simple \
 trainer.gpus=1 trainer.max_epochs=2 seed=12345 \
@ -20,7 +22,7 @@ datamodule.num_workers=12 datamodule.pin_memory=True \
 logger=wandb logger.wandb.project="env_tests" logger.wandb.group="DefaultSweep_MNIST_SimpleDenseNet"

 # Test optuna sweep with wandb logging
-echo TEST 2
+echo "TEST 2"
 python train.py -m --config-name config_optuna.yaml \
 +experiment=exp_example_simple \
 trainer.gpus=1 trainer.max_epochs=5 seed=12345 \