CLI - Support custom output directory (#110)

* Support custom output directory. * Update document.
2021-07-01 21:10:12 +08:00 · 2021-07-01 21:10:12 +08:00 · 7b0b0e9add
--- a/docs/cli.md
+++ b/docs/cli.md
@ -35,6 +35,7 @@ sb deploy [--docker-image]
          [--host-list]
          [--host-password]
          [--host-username]
+          [--output-dir]
          [--private-key]
 ```

@ -49,6 +50,7 @@ sb deploy [--docker-image]
 | `--host-list` `-l` | `None` | Comma separated host list. |
 | `--host-password` | `None` | Host password or key passphase if needed. |
 | `--host-username` | `None` | Host username if needed. |
+| `--output-dir` | `None` | Path to output directory, outputs/{datetime} will be used if not specified. |
 | `--private-key` | `None` | Path to private key if needed. |

 #### Global arguments
@ -70,6 +72,7 @@ Execute the SuperBench benchmarks locally.
 ```bash title="SB CLI"
 sb exec [--config-file]
        [--config-override]
+        [--output-dir]
 ```

 #### Optional arguments
@ -78,6 +81,7 @@ sb exec [--config-file]
 | --- | --- | --- |
 | `--config-file` `-c` | `None` | Path to SuperBench config file. |
 | `--config-override` `-C` | `None` | Extra arguments to override config_file. |
+| `--output-dir` | `None` | Path to output directory, outputs/{datetime} will be used if not specified. |

 #### Global arguments

@ -105,6 +109,7 @@ sb run [--config-file]
       [--host-list]
       [--host-password]
       [--host-username]
+       [--output-dir]
       [--private-key]
 ```

@ -121,6 +126,7 @@ sb run [--config-file]
 | `--host-list` `-l` | `None` | Comma separated host list. |
 | `--host-password` | `None` | Host password or key passphase if needed. |
 | `--host-username` | `None` | Host username if needed. |
+| `--output-dir` | `None` | Path to output directory, outputs/{datetime} will be used if not specified. |
 | `--private-key` | `None` | Path to private key if needed. |

 #### Global arguments
--- a/superbench/cli/_commands.py
+++ b/superbench/cli/_commands.py
@ -41,6 +41,11 @@ class SuperBenchCommandsLoader(CLICommandsLoader):
            ac.argument('host_list', options_list=('--host-list', '-l'), type=str, help='Comma separated host list.')
            ac.argument('host_username', type=str, help='Host username if needed.')
            ac.argument('host_password', type=str, help='Host password or key passphase if needed.')
+            ac.argument(
+                'output_dir',
+                type=str,
+                help='Path to output directory, outputs/{datetime} will be used if not specified.'
+            )
            ac.argument('private_key', type=str, help='Path to private key if needed.')
            ac.argument(
                'config_file', options_list=('--config-file', '-c'), type=str, help='Path to SuperBench config file.'
--- a/superbench/cli/_handler.py
+++ b/superbench/cli/_handler.py
@ -11,7 +11,7 @@ from omegaconf import OmegaConf
 import superbench
 from superbench.runner import SuperBenchRunner
 from superbench.executor import SuperBenchExecutor
-from superbench.common.utils import create_output_dir, get_sb_config
+from superbench.common.utils import create_sb_output_dir, get_sb_config


 def check_argument_file(name, file):
@ -62,13 +62,14 @@ def split_docker_domain(name):
    return domain, remainder


-def process_config_arguments(config_file=None, config_override=None):
+def process_config_arguments(config_file=None, config_override=None, output_dir=None):
    """Process configuration arguments.

    Args:
        config_file (str, optional): Path to SuperBench config file. Defaults to None.
        config_override (str, optional): Extra arguments to override config_file,
            following [Hydra syntax](https://hydra.cc/docs/advanced/override_grammar/basic). Defaults to None.
+        output_dir (str, optional): Path to output directory. Defaults to None.

    Returns:
        DictConfig: SuperBench config object.
@ -86,9 +87,9 @@ def process_config_arguments(config_file=None, config_override=None):
        sb_config = OmegaConf.merge(sb_config, sb_config_from_override)

    # Create output directory
-    output_dir = create_output_dir()
+    sb_output_dir = create_sb_output_dir(output_dir)

-    return sb_config, output_dir
+    return sb_config, sb_output_dir


 def process_runner_arguments(
@ -100,6 +101,7 @@ def process_runner_arguments(
    host_username=None,
    host_password=None,
    private_key=None,
+    output_dir=None,
    config_file=None,
    config_override=None
 ):
@ -114,6 +116,7 @@ def process_runner_arguments(
        host_username (str, optional): Host username if needed. Defaults to None.
        host_password (str, optional): Host password or key passphase if needed. Defaults to None.
        private_key (str, optional): Path to private key if needed. Defaults to None.
+        output_dir (str, optional): Path to output directory. Defaults to None.
        config_file (str, optional): Path to SuperBench config file. Defaults to None.
        config_override (str, optional): Extra arguments to override config_file,
            following [Hydra syntax](https://hydra.cc/docs/advanced/override_grammar/basic). Defaults to None.
@ -154,9 +157,13 @@ def process_runner_arguments(
        }
    )

-    sb_config, output_dir = process_config_arguments(config_file=config_file, config_override=config_override)
+    sb_config, sb_output_dir = process_config_arguments(
+        config_file=config_file,
+        config_override=config_override,
+        output_dir=output_dir,
+    )

-    return docker_config, ansible_config, sb_config, output_dir
+    return docker_config, ansible_config, sb_config, sb_output_dir


 def version_command_handler():
@ -168,20 +175,25 @@ def version_command_handler():
    return superbench.__version__


-def exec_command_handler(config_file=None, config_override=None):
+def exec_command_handler(config_file=None, config_override=None, output_dir=None):
    """Run the SuperBench benchmarks locally.

    Args:
        config_file (str, optional): Path to SuperBench config file. Defaults to None.
        config_override (str, optional): Extra arguments to override config_file,
            following [Hydra syntax](https://hydra.cc/docs/advanced/override_grammar/basic). Defaults to None.
+        output_dir (str, optional): Path to output directory. Defaults to None.

    Raises:
        CLIError: If input arguments are invalid.
    """
-    sb_config, output_dir = process_config_arguments(config_file=config_file, config_override=config_override)
+    sb_config, sb_output_dir = process_config_arguments(
+        config_file=config_file,
+        config_override=config_override,
+        output_dir=output_dir,
+    )

-    executor = SuperBenchExecutor(sb_config, output_dir)
+    executor = SuperBenchExecutor(sb_config, sb_output_dir)
    executor.exec()


@ -193,6 +205,7 @@ def deploy_command_handler(
    host_list=None,
    host_username=None,
    host_password=None,
+    output_dir=None,
    private_key=None
 ):
    """Deploy the SuperBench environments to all given nodes.
@ -211,12 +224,13 @@ def deploy_command_handler(
        host_list (str, optional): Comma separated host list. Defaults to None.
        host_username (str, optional): Host username if needed. Defaults to None.
        host_password (str, optional): Host password or key passphase if needed. Defaults to None.
+        output_dir (str, optional): Path to output directory. Defaults to None.
        private_key (str, optional): Path to private key if needed. Defaults to None.

    Raises:
        CLIError: If input arguments are invalid.
    """
-    docker_config, ansible_config, sb_config, output_dir = process_runner_arguments(
+    docker_config, ansible_config, sb_config, sb_output_dir = process_runner_arguments(
        docker_image=docker_image,
        docker_username=docker_username,
        docker_password=docker_password,
@ -224,10 +238,11 @@ def deploy_command_handler(
        host_list=host_list,
        host_username=host_username,
        host_password=host_password,
+        output_dir=output_dir,
        private_key=private_key,
    )

-    runner = SuperBenchRunner(sb_config, docker_config, ansible_config, output_dir)
+    runner = SuperBenchRunner(sb_config, docker_config, ansible_config, sb_output_dir)
    runner.deploy()


@ -239,6 +254,7 @@ def run_command_handler(
    host_list=None,
    host_username=None,
    host_password=None,
+    output_dir=None,
    private_key=None,
    config_file=None,
    config_override=None
@ -255,6 +271,7 @@ def run_command_handler(
        host_list (str, optional): Comma separated host list. Defaults to None.
        host_username (str, optional): Host username if needed. Defaults to None.
        host_password (str, optional): Host password or key passphase if needed. Defaults to None.
+        output_dir (str, optional): Path to output directory. Defaults to None.
        private_key (str, optional): Path to private key if needed. Defaults to None.
        config_file (str, optional): Path to SuperBench config file. Defaults to None.
        config_override (str, optional): Extra arguments to override config_file,
@ -263,7 +280,7 @@ def run_command_handler(
    Raises:
        CLIError: If input arguments are invalid.
    """
-    docker_config, ansible_config, sb_config, output_dir = process_runner_arguments(
+    docker_config, ansible_config, sb_config, sb_output_dir = process_runner_arguments(
        docker_image=docker_image,
        docker_username=docker_username,
        docker_password=docker_password,
@ -271,10 +288,11 @@ def run_command_handler(
        host_list=host_list,
        host_username=host_username,
        host_password=host_password,
+        output_dir=output_dir,
        private_key=private_key,
        config_file=config_file,
        config_override=config_override,
    )

-    runner = SuperBenchRunner(sb_config, docker_config, ansible_config, output_dir)
+    runner = SuperBenchRunner(sb_config, docker_config, ansible_config, sb_output_dir)
    runner.run()
--- a/superbench/common/utils/init.py
+++ b/superbench/common/utils/init.py
@ -4,9 +4,9 @@
 """Exposes the interface of SuperBench common utilities."""

 from superbench.common.utils.logging import SuperBenchLogger, logger
-from superbench.common.utils.file_handler import create_output_dir, get_sb_config
+from superbench.common.utils.file_handler import create_sb_output_dir, get_sb_config
 from superbench.common.utils.lazy_import import LazyImport

 nv_helper = LazyImport('superbench.common.utils.nvidia_helper')

-__all__ = ['SuperBenchLogger', 'logger', 'create_output_dir', 'get_sb_config', 'LazyImport', 'nv_helper']
+__all__ = ['SuperBenchLogger', 'logger', 'create_sb_output_dir', 'get_sb_config', 'LazyImport', 'nv_helper']
--- a/superbench/common/utils/file_handler.py
+++ b/superbench/common/utils/file_handler.py
@ -10,18 +10,22 @@ import yaml
 from omegaconf import OmegaConf


-def create_output_dir():
-    """Create a new output directory.
+def create_sb_output_dir(output_dir=None):
+    """Create output directory.

-    Generate a new output directory name based on current time and create it on filesystem.
+    Create output directory on filesystem, generate a new name based on current time if not provided.
+
+    Args:
+        output_dir (str): Output directory. Defaults to None.

    Returns:
-        str: Output directory name.
+        str: Given or generated output directory.
    """
-    output_name = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
-    output_path = Path('.', 'outputs', output_name).resolve()
+    if not output_dir:
+        output_dir = str(Path('outputs', datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
+    output_path = Path(output_dir).expanduser().resolve()
    output_path.mkdir(mode=0o755, parents=True, exist_ok=True)
-    return str(output_path)
+    return output_dir


 def get_sb_config(config_file):
--- a/superbench/executor/executor.py
+++ b/superbench/executor/executor.py
@ -15,19 +15,20 @@ from superbench.common.utils import SuperBenchLogger, logger

 class SuperBenchExecutor():
    """SuperBench executor class."""
-    def __init__(self, sb_config, output_dir):
+    def __init__(self, sb_config, sb_output_dir):
        """Initilize.

        Args:
            sb_config (DictConfig): SuperBench config object.
-            output_dir (str): Dir for output.
+            sb_output_dir (str): SuperBench output directory.
        """
        self._sb_config = sb_config
-        self._output_dir = output_dir
+        self._sb_output_dir = sb_output_dir
+        self._output_path = Path(sb_output_dir).expanduser().resolve()

        self.__set_logger('sb-exec.log')
        logger.info('Executor uses config: %s.', self._sb_config)
-        logger.info('Executor writes to: %s.', self._output_dir)
+        logger.info('Executor writes to: %s.', str(self._output_path))

        self.__validate_sb_config()
        self._sb_benchmarks = self._sb_config.superbench.benchmarks
@ -40,7 +41,7 @@ class SuperBenchExecutor():
        Args:
            filename (str): Log file name.
        """
-        SuperBenchLogger.add_handler(logger.logger, filename=str(Path(self._output_dir) / filename))
+        SuperBenchLogger.add_handler(logger.logger, filename=str(self._output_path / filename))

    def __validate_sb_config(self):
        """Validate SuperBench config object.
@ -127,7 +128,7 @@ class SuperBenchExecutor():
        Args:
            benchmark_name (str): Benchmark name.
        """
-        benchmark_output_dir = Path(self._output_dir, 'benchmarks', benchmark_name)
+        benchmark_output_dir = self._output_path / 'benchmarks' / benchmark_name
        if benchmark_output_dir.is_dir() and any(benchmark_output_dir.iterdir()):
            logger.warning('Benchmark output directory %s is not empty.', str(benchmark_output_dir))
            for i in itertools.count(start=1):
@ -144,7 +145,7 @@ class SuperBenchExecutor():
            benchmark_name (str): Benchmark name.
            benchmark_results (dict): Benchmark results.
        """
-        with Path(self._output_dir, 'benchmarks', benchmark_name, 'results.json').open(mode='w') as f:
+        with (self._output_path / 'benchmarks' / benchmark_name / 'results.json').open(mode='w') as f:
            json.dump(benchmark_results, f, indent=2)

    def exec(self):
--- a/superbench/runner/runner.py
+++ b/superbench/runner/runner.py
@ -15,24 +15,25 @@ from superbench.runner.ansible import AnsibleClient

 class SuperBenchRunner():
    """SuperBench runner class."""
-    def __init__(self, sb_config, docker_config, ansible_config, output_dir):
+    def __init__(self, sb_config, docker_config, ansible_config, sb_output_dir):
        """Initilize.

        Args:
            sb_config (DictConfig): SuperBench config object.
            docker_config (DictConfig): Docker config object.
            ansible_config (DictConfig): Ansible config object.
-            output_dir (str): Dir for output.
+            sb_output_dir (str): SuperBench output directory.
        """
        self._sb_config = sb_config
        self._docker_config = docker_config
        self._ansible_config = ansible_config
-        self._output_dir = output_dir
+        self._sb_output_dir = sb_output_dir
+        self._output_path = Path(sb_output_dir).expanduser().resolve()
        self._ansible_client = AnsibleClient(ansible_config)

        self.__set_logger('sb-run.log')
        logger.info('Runner uses config: %s.', self._sb_config)
-        logger.info('Runner writes to: %s.', self._output_dir)
+        logger.info('Runner writes to: %s.', str(self._output_path))

        self._sb_benchmarks = self._sb_config.superbench.benchmarks
        self.__validate_sb_config()
@ -45,7 +46,7 @@ class SuperBenchRunner():
        Args:
            filename (str): Log file name.
        """
-        SuperBenchLogger.add_handler(logger.logger, filename=str(Path(self._output_dir) / filename))
+        SuperBenchLogger.add_handler(logger.logger, filename=str(self._output_path / filename))

    def __validate_sb_config(self):
        """Validate SuperBench config object.
@ -92,7 +93,10 @@ class SuperBenchRunner():
        Return:
            str: Runner command.
        """
-        exec_command = ('sb exec -c sb.config.yaml -C superbench.enable={name}').format(name=benchmark_name)
+        exec_command = ('sb exec --output-dir {output_dir} -c sb.config.yaml -C superbench.enable={name}').format(
+            name=benchmark_name,
+            output_dir=self._sb_output_dir,
+        )
        mode_command = exec_command
        if mode.name == 'local':
            mode_command = '{prefix} {command}'.format(
@ -124,7 +128,7 @@ class SuperBenchRunner():
        logger.info('Preparing SuperBench environment.')
        extravars = {
            'ssh_port': random.randint(1 << 14, (1 << 15) - 1),
-            'output_dir': self._output_dir,
+            'output_dir': str(self._output_path),
            'docker_image': self._docker_config.image,
            'gpu_vendor': 'nvidia',
        }
@ -141,12 +145,12 @@ class SuperBenchRunner():
    def check_env(self):    # pragma: no cover
        """Check SuperBench environment."""
        logger.info('Checking SuperBench environment.')
-        OmegaConf.save(config=self._sb_config, f=str(Path(self._output_dir) / 'sb.config.yaml'))
+        OmegaConf.save(config=self._sb_config, f=str(self._output_path / 'sb.config.yaml'))
        self._ansible_client.run(
            self._ansible_client.get_playbook_config(
                'check_env.yaml',
                extravars={
-                    'output_dir': self._output_dir,
+                    'output_dir': str(self._output_path),
                    'env': '\n'.join(f'{k}={v}' for k, v in self._sb_config.superbench.env.items()),
                }
            )
--- a/tests/executor/test_executor.py
+++ b/tests/executor/test_executor.py
@ -27,17 +27,17 @@ class ExecutorTestCase(unittest.TestCase):
        default_config_file = Path(__file__).parent / '../../superbench/config/default.yaml'
        with default_config_file.open() as fp:
            self.default_config = OmegaConf.create(yaml.load(fp, Loader=yaml.SafeLoader))
-        self.output_dir = tempfile.mkdtemp()
+        self.sb_output_dir = tempfile.mkdtemp()

-        self.executor = SuperBenchExecutor(self.default_config, self.output_dir)
+        self.executor = SuperBenchExecutor(self.default_config, self.sb_output_dir)

    def tearDown(self):
        """Hook method for deconstructing the test fixture after testing it."""
-        shutil.rmtree(self.output_dir)
+        shutil.rmtree(self.sb_output_dir)

    def test_set_logger(self):
        """Test log file exists."""
-        expected_log_file = Path(self.executor._output_dir) / 'sb-exec.log'
+        expected_log_file = Path(self.executor._sb_output_dir) / 'sb-exec.log'
        self.assertTrue(expected_log_file.is_file())

    def test_get_enabled_benchmarks_enable_none(self):
@ -92,7 +92,7 @@ class ExecutorTestCase(unittest.TestCase):

    def test_create_benchmark_dir(self):
        """Test __create_benchmark_dir."""
-        foo_path = Path(self.output_dir, 'benchmarks', 'foo')
+        foo_path = Path(self.sb_output_dir, 'benchmarks', 'foo')
        self.executor._SuperBenchExecutor__create_benchmark_dir('foo')
        self.assertTrue(foo_path.is_dir())
        self.assertFalse(any(foo_path.iterdir()))
@ -115,7 +115,7 @@ class ExecutorTestCase(unittest.TestCase):

    def test_write_benchmark_results(self):
        """Test __write_benchmark_results."""
-        foobar_path = Path(self.output_dir, 'benchmarks', 'foobar')
+        foobar_path = Path(self.sb_output_dir, 'benchmarks', 'foobar')
        foobar_results_path = foobar_path / 'results.json'
        self.executor._SuperBenchExecutor__create_benchmark_dir('foobar')
        foobar_results = {
@ -142,7 +142,7 @@ class ExecutorTestCase(unittest.TestCase):
        mock_exec_benchmark.return_value = {}
        self.executor.exec()

-        self.assertTrue(Path(self.output_dir, 'benchmarks').is_dir())
+        self.assertTrue(Path(self.sb_output_dir, 'benchmarks').is_dir())
        for benchmark_name in self.executor._sb_benchmarks:
-            self.assertTrue(Path(self.output_dir, 'benchmarks', benchmark_name).is_dir())
-            self.assertTrue(Path(self.output_dir, 'benchmarks', benchmark_name, 'results.json').is_file())
+            self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name).is_dir())
+            self.assertTrue(Path(self.sb_output_dir, 'benchmarks', benchmark_name, 'results.json').is_file())
--- a/tests/runner/test_runner.py
+++ b/tests/runner/test_runner.py
@ -22,17 +22,17 @@ class RunnerTestCase(unittest.TestCase):
        default_config_file = Path(__file__).parent / '../../superbench/config/default.yaml'
        with default_config_file.open() as fp:
            self.default_config = OmegaConf.create(yaml.load(fp, Loader=yaml.SafeLoader))
-        self.output_dir = tempfile.mkdtemp()
+        self.sb_output_dir = tempfile.mkdtemp()

-        self.runner = SuperBenchRunner(self.default_config, None, None, self.output_dir)
+        self.runner = SuperBenchRunner(self.default_config, None, None, self.sb_output_dir)

    def tearDown(self):
        """Hook method for deconstructing the test fixture after testing it."""
-        shutil.rmtree(self.output_dir)
+        shutil.rmtree(self.sb_output_dir)

    def test_set_logger(self):
        """Test log file exists."""
-        expected_log_file = Path(self.runner._output_dir) / 'sb-run.log'
+        expected_log_file = Path(self.runner._sb_output_dir) / 'sb-run.log'
        self.assertTrue(expected_log_file.is_file())

    def test_get_mode_command(self):
@ -43,7 +43,8 @@ class RunnerTestCase(unittest.TestCase):
                'mode': {
                    'name': 'non_exist',
                },
-                'expected_command': 'sb exec -c sb.config.yaml -C superbench.enable=foo',
+                'expected_command':
+                f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo',
            },
            {
                'benchmark_name': 'foo',
@ -52,7 +53,8 @@ class RunnerTestCase(unittest.TestCase):
                    'proc_num': 1,
                    'prefix': '',
                },
-                'expected_command': 'sb exec -c sb.config.yaml -C superbench.enable=foo',
+                'expected_command':
+                f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo',
            },
            {
                'benchmark_name':
@ -63,19 +65,22 @@ class RunnerTestCase(unittest.TestCase):
                    'proc_rank': 6,
                    'prefix': 'CUDA_VISIBLE_DEVICES={proc_rank} numactl -c $(({proc_rank}/2))'
                },
-                'expected_command':
-                ('CUDA_VISIBLE_DEVICES=6 numactl -c $((6/2)) '
-                 'sb exec -c sb.config.yaml -C superbench.enable=foo'),
+                'expected_command': (
+                    'CUDA_VISIBLE_DEVICES=6 numactl -c $((6/2)) '
+                    f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo'
+                ),
            },
            {
-                'benchmark_name': 'foo',
+                'benchmark_name':
+                'foo',
                'mode': {
                    'name': 'local',
                    'proc_num': 16,
                    'proc_rank': 1,
                    'prefix': 'RANK={proc_rank} NUM={proc_num}'
                },
-                'expected_command': 'RANK=1 NUM=16 sb exec -c sb.config.yaml -C superbench.enable=foo',
+                'expected_command':
+                f'RANK=1 NUM=16 sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo',
            },
            {
                'benchmark_name':
@ -90,7 +95,7 @@ class RunnerTestCase(unittest.TestCase):
                    '--use_env --no_python --nproc_per_node=1 '
                    '--nnodes=$NNODES --node_rank=$NODE_RANK '
                    '--master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
-                    'sb exec -c sb.config.yaml -C superbench.enable=foo '
+                    f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo '
                    'superbench.benchmarks.foo.parameters.distributed_impl=ddp '
                    'superbench.benchmarks.foo.parameters.distributed_backend=nccl'
                ),
@ -108,7 +113,7 @@ class RunnerTestCase(unittest.TestCase):
                    '--use_env --no_python --nproc_per_node=8 '
                    '--nnodes=1 --node_rank=$NODE_RANK '
                    '--master_addr=$MASTER_ADDR --master_port=$MASTER_PORT '
-                    'sb exec -c sb.config.yaml -C superbench.enable=foo '
+                    f'sb exec --output-dir {self.sb_output_dir} -c sb.config.yaml -C superbench.enable=foo '
                    'superbench.benchmarks.foo.parameters.distributed_impl=ddp '
                    'superbench.benchmarks.foo.parameters.distributed_backend=nccl'
                ),